library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(NbClust)
library(mice)
##
## Attaching package: 'mice'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(viridis)
## Loading required package: viridisLite
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
This markdown document contains the clustering procedure to group Countries based on a set of characteristics. The clustering is meant to reduce the number of parameters in the Bayesian Hierarchical Model for H5N1 Infection Fatality Ratios, so that instead of country-year specific parameters we have cluster-year specific parameters at most.
The characteristics used (at least in principle) for clustering are indicators found on WorldBank:
Depending on the variables’ behaviour as a group (collinearity) we might have to remove some in the actual clustering step.
The analysis will be structured as follows:
# Data Some means it only contains some countries (i.e. the ones on the WHO tracker)
library(readxl)
library(openxlsx)
data_some <- read.csv("./WorldData.csv")
# Examine the data
str(data_some)
## 'data.frame': 217 obs. of 15 variables:
## $ Country : chr "Afghanistan" "Albania" "Algeria" "American Samoa" ...
## $ Population_Total : num 31833879 2902012 38723552 53286 77161 ...
## $ Current_Health_Expenditure_.GDP : chr "11.2426364" "6.305844005" "5.268283782" ".." ...
## $ GDP_per_Capita : chr "451.4874436" "4493.153886" "4545.498223" "11705.50962" ...
## $ UNDP_Multidim_Poverty_Ratio_.Pop : chr "55.9" "0.7" "1.4" ".." ...
## $ Poverty_Ratio_SocialLine_.Pop : chr ".." "19.14" "17.6" ".." ...
## $ Population_Rural : chr "23970533.76" "1308266.095" "11725015.71" "6643.761905" ...
## $ Prevalence.of.HIV.total....of.population.ages.15.49. : chr "0.1" "0.1" "0.1" ".." ...
## $ Incidence.of.tuberculosis..per.100.000.people.. : chr "187.8571429" "17.19047619" "69.47619048" "6.771428571" ...
## $ Diabetes.prevalence....of.population.ages.20.to.79. : chr "9.25" "6.5" "7.05" "20.3" ...
## $ Hospital_beds_1000ppl : chr "0.417894737" "2.953888889" "1.699230769" ".." ...
## $ Physicians_1000ppl : chr "0.245933333" "1.387615385" "1.4515" ".." ...
## $ Death.rate..crude..per.1.000.people...SP.DYN.CDRT.IN.: num 8.13 7.82 4.73 5.34 3.62 ...
## $ Rural.land.area..sq..km...AG.LND.TOTL.RU.K2. : chr "636173.2292" "27331.60161" "2305478.23" "198.2477988" ...
## $ Surface.area..sq..km...AG.SRF.TOTL.K2. : chr "652860" "28750" "2381740.4" "200" ...
summary(data_some)
## Country Population_Total Current_Health_Expenditure_.GDP
## Length:217 Min. :1.044e+04 Length:217
## Class :character 1st Qu.:7.213e+05 Class :character
## Mode :character Median :6.015e+06 Mode :character
## Mean :3.334e+07
## 3rd Qu.:2.177e+07
## Max. :1.360e+09
## GDP_per_Capita UNDP_Multidim_Poverty_Ratio_.Pop
## Length:217 Length:217
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## Poverty_Ratio_SocialLine_.Pop Population_Rural
## Length:217 Length:217
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## Prevalence.of.HIV.total....of.population.ages.15.49.
## Length:217
## Class :character
## Mode :character
##
##
##
## Incidence.of.tuberculosis..per.100.000.people..
## Length:217
## Class :character
## Mode :character
##
##
##
## Diabetes.prevalence....of.population.ages.20.to.79. Hospital_beds_1000ppl
## Length:217 Length:217
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## Physicians_1000ppl Death.rate..crude..per.1.000.people...SP.DYN.CDRT.IN.
## Length:217 Min. : 1.266
## Class :character 1st Qu.: 5.988
## Mode :character Median : 7.739
## Mean : 7.992
## 3rd Qu.: 9.760
## Max. :16.861
## Rural.land.area..sq..km...AG.LND.TOTL.RU.K2.
## Length:217
## Class :character
## Mode :character
##
##
##
## Surface.area..sq..km...AG.SRF.TOTL.K2.
## Length:217
## Class :character
## Mode :character
##
##
##
# Replace the .. with NA
data_some[data_some == '..'] <- NA
data_some
## Country Population_Total
## 1 Afghanistan 3.183388e+07
## 2 Albania 2.902012e+06
## 3 Algeria 3.872355e+07
## 4 American Samoa 5.328610e+04
## 5 Andorra 7.716143e+04
## 6 Angola 2.658364e+07
## 7 Antigua and Barbuda 8.684929e+04
## 8 Argentina 4.242417e+07
## 9 Armenia 3.038367e+06
## 10 Aruba 1.039730e+05
## 11 Australia 2.307926e+07
## 12 Austria 8.566906e+06
## 13 Azerbaijan 9.330278e+06
## 14 Bahamas, The 3.742072e+05
## 15 Bahrain 1.253301e+06
## 16 Bangladesh 1.563961e+08
## 17 Barbados 2.763531e+05
## 18 Belarus 9.476676e+06
## 19 Belgium 1.108831e+07
## 20 Belize 3.404262e+05
## 21 Benin 1.083207e+07
## 22 Bermuda 6.359400e+04
## 23 Bhutan 7.212714e+05
## 24 Bolivia 1.066967e+07
## 25 Bosnia and Herzegovina 3.657428e+06
## 26 Botswana 2.128600e+06
## 27 Brazil 1.977563e+08
## 28 British Virgin Islands 3.084833e+04
## 29 Brunei Darussalam 4.075456e+05
## 30 Bulgaria 7.240839e+06
## 31 Burkina Faso 1.785832e+07
## 32 Burundi 1.030133e+07
## 33 Cabo Verde 5.061323e+05
## 34 Cambodia 1.518629e+07
## 35 Cameroon 2.176649e+07
## 36 Canada 3.524247e+07
## 37 Cayman Islands 5.854210e+04
## 38 Central African Republic 4.656981e+06
## 39 Chad 1.390984e+07
## 40 Channel Islands 1.606329e+05
## 41 Chile 1.781892e+07
## 42 China 1.360274e+09
## 43 Colombia 4.639926e+07
## 44 Comoros 7.017382e+05
## 45 Congo, Dem. Rep. 7.763187e+07
## 46 Congo, Rep. 4.819028e+06
## 47 Costa Rica 4.685320e+06
## 48 Cote d'Ivoire 2.452501e+07
## 49 Croatia 4.151096e+06
## 50 Cuba 1.123552e+07
## 51 Curacao 1.496609e+05
## 52 Cyprus 1.179559e+06
## 53 Czechia 1.048752e+07
## 54 Denmark 5.639088e+06
## 55 Djibouti 9.826826e+05
## 56 Dominica 6.862286e+04
## 57 Dominican Republic 1.017830e+07
## 58 Ecuador 1.578160e+07
## 59 Egypt, Arab Rep. 9.574699e+07
## 60 El Salvador 6.133012e+06
## 61 Equatorial Guinea 1.349983e+06
## 62 Eritrea 3.018541e+06
## 63 Estonia 1.335158e+06
## 64 Eswatini 1.136427e+06
## 65 Ethiopia 9.945060e+07
## 66 Faroe Islands 4.984052e+04
## 67 Fiji 9.075215e+05
## 68 Finland 5.413875e+06
## 69 France 6.570100e+07
## 70 French Polynesia 2.722525e+05
## 71 Gabon 1.915904e+06
## 72 Gambia, The 2.113865e+06
## 73 Georgia 3.780039e+06
## 74 Germany 8.220985e+07
## 75 Ghana 2.744969e+07
## 76 Gibraltar 3.274762e+04
## 77 Greece 1.086521e+07
## 78 Greenland 5.653524e+04
## 79 Grenada 1.132736e+05
## 80 Guam 1.647058e+05
## 81 Guatemala 1.536801e+07
## 82 Guinea 1.135579e+07
## 83 Guinea-Bissau 1.709993e+06
## 84 Guyana 7.721852e+05
## 85 Haiti 1.022689e+07
## 86 Honduras 8.887120e+06
## 87 Hong Kong SAR, China 7.163986e+06
## 88 Hungary 9.897702e+06
## 89 Iceland 3.321802e+05
## 90 India 1.289124e+09
## 91 Indonesia 2.547262e+08
## 92 Iran, Islamic Rep. 8.042158e+07
## 93 Iraq 3.517508e+07
## 94 Ireland 4.643031e+06
## 95 Isle of Man 8.277552e+04
## 96 Israel 8.119471e+06
## 97 Italy 5.930080e+07
## 98 Jamaica 2.770056e+06
## 99 Japan 1.271280e+08
## 100 Jordan 8.558780e+06
## 101 Kazakhstan 1.773272e+07
## 102 Kenya 4.470787e+07
## 103 Kiribati 1.133770e+05
## 104 Korea, Dem. People's Rep. 2.532425e+07
## 105 Korea, Rep. 5.021169e+07
## 106 Kosovo 1.797406e+06
## 107 Kuwait 3.446383e+06
## 108 Kyrgyz Republic 5.907793e+06
## 109 Lao PDR 6.641498e+06
## 110 Latvia 2.047030e+06
## 111 Lebanon 5.449768e+06
## 112 Lesotho 2.086487e+06
## 113 Liberia 4.367451e+06
## 114 Libya 6.479366e+06
## 115 Liechtenstein 3.692490e+04
## 116 Lithuania 3.022694e+06
## 117 Luxembourg 5.493234e+05
## 118 Macao SAR, China 5.873381e+05
## 119 Madagascar 2.428609e+07
## 120 Malawi 1.632120e+07
## 121 Malaysia 3.014086e+07
## 122 Maldives 4.057633e+05
## 123 Mali 1.776445e+07
## 124 Malta 4.487680e+05
## 125 Marshall Islands 4.831267e+04
## 126 Mauritania 3.789597e+06
## 127 Mauritius 1.251696e+06
## 128 Mexico 1.173933e+08
## 129 Micronesia, Fed. Sts. 1.098817e+05
## 130 Moldova 2.783877e+06
## 131 Monaco 3.527705e+04
## 132 Mongolia 2.934363e+06
## 133 Montenegro 6.187698e+05
## 134 Morocco 3.371016e+07
## 135 Mozambique 2.558981e+07
## 136 Myanmar 5.033565e+07
## 137 Namibia 2.329369e+06
## 138 Nauru 1.073495e+04
## 139 Nepal 2.767959e+07
## 140 Netherlands 1.688068e+07
## 141 New Caledonia 2.691776e+05
## 142 New Zealand 4.565690e+06
## 143 Nicaragua 5.991892e+06
## 144 Niger 1.887536e+07
## 145 Nigeria 1.814662e+08
## 146 North Macedonia 1.925762e+06
## 147 Northern Mariana Islands 5.356476e+04
## 148 Norway 5.045191e+06
## 149 Oman 3.616521e+06
## 150 Pakistan 2.086671e+08
## 151 Palau 1.836024e+04
## 152 Panama 3.829469e+06
## 153 Papua New Guinea 8.285595e+06
## 154 Paraguay 6.028370e+06
## 155 Peru 3.029477e+07
## 156 Philippines 1.010424e+08
## 157 Poland 3.788447e+07
## 158 Portugal 1.044604e+07
## 159 Puerto Rico 3.537562e+06
## 160 Qatar 1.894866e+06
## 161 Romania 2.010098e+07
## 162 Russian Federation 1.440363e+08
## 163 Rwanda 1.116207e+07
## 164 Samoa 1.992849e+05
## 165 San Marino 3.256948e+04
## 166 Sao Tome and Principe 1.922880e+05
## 167 Saudi Arabia 2.672886e+07
## 168 Senegal 1.402219e+07
## 169 Serbia 7.142856e+06
## 170 Seychelles 9.307238e+04
## 171 Sierra Leone 6.750671e+06
## 172 Singapore 5.171274e+06
## 173 Sint Maarten (Dutch part) 3.641995e+04
## 174 Slovak Republic 5.410501e+06
## 175 Slovenia 2.055237e+06
## 176 Solomon Islands 6.107987e+05
## 177 Somalia 1.355602e+07
## 178 South Africa 5.510582e+07
## 179 South Sudan 9.816974e+06
## 180 Spain 4.610670e+07
## 181 Sri Lanka 2.106171e+07
## 182 St. Kitts and Nevis 4.681662e+04
## 183 St. Lucia 1.726166e+05
## 184 St. Martin (French part) 3.465767e+04
## 185 St. Vincent and the Grenadines 1.078582e+05
## 186 Sudan 3.894343e+07
## 187 Suriname 5.685874e+05
## 188 Sweden 9.685628e+06
## 189 Switzerland 8.086168e+06
## 190 Syrian Arab Republic 2.074978e+07
## 191 Tajikistan 8.348036e+06
## 192 Tanzania 5.013794e+07
## 193 Thailand 6.929041e+07
## 194 Timor-Leste 1.153769e+06
## 195 Togo 7.331587e+06
## 196 Tonga 1.060747e+05
## 197 Trinidad and Tobago 1.338867e+06
## 198 Tunisia 1.114840e+07
## 199 Turkiye 7.640729e+07
## 200 Turkmenistan 6.014850e+06
## 201 Turks and Caicos Islands 3.422029e+04
## 202 Tuvalu 1.044186e+04
## 203 Uganda 3.624449e+07
## 204 Ukraine 4.559534e+07
## 205 United Arab Emirates 7.573282e+06
## 206 United Kingdom 6.405129e+07
## 207 United States 3.149526e+08
## 208 Uruguay 3.344364e+06
## 209 Uzbekistan 3.006367e+07
## 210 Vanuatu 2.567376e+05
## 211 Venezuela, RB 2.866430e+07
## 212 Viet Nam 9.040825e+07
## 213 Virgin Islands (U.S.) 1.075409e+05
## 214 West Bank and Gaza 4.098806e+06
## 215 Yemen, Rep. 2.971416e+07
## 216 Zambia 1.555431e+07
## 217 Zimbabwe 1.406984e+07
## Current_Health_Expenditure_.GDP GDP_per_Capita
## 1 11.2426364 451.4874436
## 2 6.305844005 4493.153886
## 3 5.268283782 4545.498223
## 4 <NA> 11705.50962
## 5 6.650699064 42338.98155
## 6 2.918620436 2920.830059
## 7 5.110502896 16494.48587
## 8 9.141930304 10186.08225
## 9 8.956842105 3613.567524
## 10 <NA> 26699.88887
## 11 9.099192092 51793.94662
## 12 10.23421053 47081.92499
## 13 3.322631579 4964.763133
## 14 5.372967545 29374.01475
## 15 4.041130895 23586.559
## 16 2.232583046 1284.247251
## 17 6.910125054 18242.059
## 18 5.710526316 5873.224399
## 19 10.23358792 44523.6094
## 20 4.41062335 5861.354914
## 21 2.874702315 1048.749687
## 22 <NA> 103047.3282
## 23 3.781448113 2552.163178
## 24 5.837991112 2463.695863
## 25 9.19 4997.859457
## 26 6.033725739 6415.647824
## 27 8.618495314 8827.379512
## 28 <NA> <NA>
## 29 2.195953946 33617.41147
## 30 7.205789474 7940.040173
## 31 5.061279397 666.1661779
## 32 8.268308865 205.7231497
## 33 4.794364164 3540.203208
## 34 6.688282038 1368.061431
## 35 3.696850376 1448.111371
## 36 10.47599082 45454.25459
## 37 <NA> 82848.08103
## 38 5.556998728 420.9850319
## 39 4.606336832 728.1109852
## 40 <NA> 60868.8135
## 41 7.783749199 12636.47683
## 42 4.624752959 6716.493899
## 43 7.120626424 5879.763607
## 44 5.429377581 1402.927117
## 45 3.819649972 398.8776238
## 46 2.28647202 2497.356884
## 47 7.403352212 9773.96168
## 48 4.097755419 1766.442639
## 49 7.132105263 14152.42821
## 50 10.75198322 6312.32364
## 51 <NA> 18848.13906
## 52 6.574736842 29014.88979
## 53 7.306246758 20491.14397
## 54 9.984 58343.35722
## 55 3.219433508 1928.800896
## 56 5.298560495 7265.270601
## 57 4.58832904 6261.184773
## 58 6.694435119 5044.337852
## 59 4.691824813 2556.298077
## 60 8.708990499 3561.302153
## 61 2.067721851 10335.47608
## 62 5.106465929 497.5959148
## 63 6.280223394 18755.84175
## 64 7.456964293 3500.341109
## 65 4.125796168 543.7697356
## 66 <NA> 53369.6723
## 67 3.561239845 4493.671401
## 68 9.05 46712.24144
## 69 11.08443275 39864.08938
## 70 <NA> 21931.18675
## 71 2.933351829 7692.224918
## 72 4.132538331 690.0482551
## 73 7.933 3943.010288
## 74 11.12894034 44291.54316
## 75 3.916908603 1579.339022
## 76 <NA> <NA>
## 77 8.623236807 22332.7167
## 78 <NA> 44988.35503
## 79 5.206825106 8081.951869
## 80 <NA> 31953.8835
## 81 6.169551724 3510.418896
## 82 4.057360837 814.2916512
## 83 6.434563135 667.2952505
## 84 4.196538121 6167.413372
## 85 4.102177909 1262.361463
## 86 8.015170374 2084.924914
## 87 <NA> 38294.74006
## 88 7.256842105 14367.019
## 89 8.6245 58253.49624
## 90 3.452631579 1487.062035
## 91 2.829826406 3082.968832
## 92 6.082438895 4870.444621
## 93 3.599363903 4441.51616
## 94 8.169644046 66564.53919
## 95 <NA> 70635.62348
## 96 7.039473684 35610.8813
## 97 8.707 34733.2027
## 98 5.407712334 5001.91238
## 99 9.601578947 39424.89045
## 100 7.773431176 3574.815553
## 101 3.213157895 8632.755891
## 102 4.730462288 1324.684593
## 103 11.51789474 1629.494409
## 104 <NA> <NA>
## 105 6.38518803 26395.0069
## 106 <NA> 4008.653856
## 107 3.396623813 37437.46975
## 108 6.642631579 1072.10284
## 109 2.752456187 1589.848155
## 110 6.070526316 13954.98143
## 111 7.948515442 6588.222944
## 112 8.838850373 1004.059832
## 113 9.020622479 560.9782408
## 114 3.079954493 8956.304324
## 115 <NA> 150101.8561
## 116 6.52 15406.66863
## 117 5.931 109231.9904
## 118 <NA> 54881.66527
## 119 4.455707863 449.5098564
## 120 6.106377928 540.7598065
## 121 3.446505697 9052.014017
## 122 8.786842105 8098.60639
## 123 4.440895093 694.946244
## 124 8.703157895 25691.87858
## 125 14.86154471 4006.491575
## 126 3.211177388 1594.251035
## 127 4.710111667 8724.106087
## 128 5.737095081 9992.807482
## 129 12.13124807 2945.613811
## 130 8.874210526 3052.428947
## 131 4.333218199 177869.5426
## 132 4.320910391 3204.194369
## 133 8.819090909 7052.070075
## 134 5.038204319 3099.450184
## 135 5.87018835 533.2574143
## 136 3.420526316 965.586106
## 137 9.459853022 4554.559087
## 138 12.32979865 7677.608928
## 139 4.474736842 791.5462904
## 140 9.994736842 51275.65537
## 141 <NA> 32639.73246
## 142 9.119852894 37561.12662
## 143 7.607375998 1764.675457
## 144 5.214959746 487.9278815
## 145 3.729135237 2008.130896
## 146 7.051578947 5403.221187
## 147 <NA> 18822.71773
## 148 9.2235 83085.93364
## 149 3.025274057 19709.47279
## 150 2.573061365 1187.556146
## 151 11.46376545 12639.55519
## 152 7.55064239 11310.53566
## 153 2.409026196 2063.660702
## 154 6.072227704 4865.547919
## 155 4.910122168 5458.101783
## 156 4.0895 2508.895892
## 157 6.3085 13330.87094
## 158 9.7005 21907.97357
## 159 <NA> 28145.35544
## 160 2.664236132 71861.00433
## 161 5.305263158 9854.693726
## 162 5.348947368 10578.62365
## 163 7.303107914 642.6923054
## 164 5.317211703 3597.543938
## 165 7.049094476 54233.0566
## 166 7.683368382 1449.696875
## 167 4.625313283 23320.19909
## 168 4.188314012 1306.71208
## 169 8.864323339 6757.944275
## 170 4.677906664 14028.06898
## 171 10.65651126 754.0572643
## 172 3.794145396 53595.50361
## 173 <NA> 32449.08868
## 174 7.016842105 17283.66745
## 175 8.4255 23605.40865
## 176 5.568360793 1792.165577
## 177 <NA> 434.5345085
## 178 7.706607241 6477.800354
## 179 10.07183256 1375.877318
## 180 8.864402218 29285.69457
## 181 3.747368421 3014.031811
## 182 5.116900721 17946.60954
## 183 5.016750134 9510.273258
## 184 <NA> 20567.69883
## 185 4.192395736 7181.386048
## 186 5.042841596 1085.614915
## 187 5.464989663 6245.639271
## 188 9.821 52574.84705
## 189 10.43189089 78503.01779
## 190 3.700341392 1457.382506
## 191 6.247368421 762.7129909
## 192 4.868653122 846.6287275
## 193 3.65016665 5343.791368
## 194 6.83673939 1137.380449
## 195 4.729092272 754.9695488
## 196 4.750224276 3731.753657
## 197 5.323858072 17296.01771
## 198 5.897287995 3804.384555
## 199 4.683157895 9879.268667
## 200 4.680823804 5001.805236
## 201 <NA> 24992.06601
## 202 15.19517291 3759.338954
## 203 5.272838818 702.4227647
## 204 6.918947368 3125.154936
## 205 3.589794799 44112.01078
## 206 9.763402509 43234.43063
## 207 16.0682107 55979.98896
## 208 8.410401746 14018.9084
## 209 5.218947368 1795.468185
## 210 3.541446108 2751.688233
## 211 3.181720378 9716.628345
## 212 4.632116643 2247.165631
## 213 <NA> 38101.09984
## 214 <NA> 2789.594057
## 215 5.288947912 887.0500411
## 216 5.058898525 1252.39767
## 217 6.303795893 1289.531294
## UNDP_Multidim_Poverty_Ratio_.Pop Poverty_Ratio_SocialLine_.Pop
## 1 55.9 <NA>
## 2 0.7 19.14
## 3 1.4 17.6
## 4 <NA> <NA>
## 5 <NA> <NA>
## 6 51.1 38.8
## 7 <NA> <NA>
## 8 0.4 25.51052632
## 9 0.2 21.78
## 10 <NA> <NA>
## 11 <NA> 11.88571429
## 12 <NA> 11.41578947
## 13 <NA> 10.83333333
## 14 <NA> <NA>
## 15 <NA> <NA>
## 16 24.6 34.825
## 17 2.5 <NA>
## 18 <NA> 13.68888889
## 19 <NA> 10.57368421
## 20 4.3 <NA>
## 21 66.8 46.4
## 22 <NA> <NA>
## 23 <NA> 26.04
## 24 9.1 27.77058824
## 25 2.2 16.4
## 26 17.2 37.15
## 27 3.8 29.66315789
## 28 <NA> <NA>
## 29 <NA> <NA>
## 30 <NA> 18.5
## 31 <NA> 47.88
## 32 75.1 66.33333333
## 33 <NA> 30.8
## 34 16.6 <NA>
## 35 43.6 40.3
## 36 <NA> 14.62941176
## 37 <NA> <NA>
## 38 80.4 63.8
## 39 84.2 47.05
## 40 <NA> <NA>
## 41 <NA> 21.72222222
## 42 3.9 27.74285714
## 43 4.8 30.58888889
## 44 37.3 35.2
## 45 64.5 80.03333333
## 46 24.3 47.55
## 47 0.5 24.85714286
## 48 46.1 38.85
## 49 <NA> 16.41538462
## 50 0.7 <NA>
## 51 <NA> <NA>
## 52 <NA> 9.511111111
## 53 <NA> 7.955555556
## 54 <NA> 7.194736842
## 55 <NA> 36.7
## 56 <NA> <NA>
## 57 2.3 26.74
## 58 2.1 28.74285714
## 59 5.2 21.55714286
## 60 7.9 27.58947368
## 61 <NA> <NA>
## 62 <NA> <NA>
## 63 <NA> 13.16842105
## 64 19.2 47.9
## 65 68.7 42.83333333
## 66 <NA> <NA>
## 67 1.5 21.06666667
## 68 <NA> 7.836842105
## 69 <NA> 11.64210526
## 70 <NA> <NA>
## 71 15.6 26.75
## 72 41.7 40.95
## 73 0.3 30.235
## 74 <NA> 10.93888889
## 75 24.6 42.23333333
## 76 <NA> <NA>
## 77 <NA> 17.74736842
## 78 <NA> <NA>
## 79 <NA> 18.8
## 80 <NA> <NA>
## 81 28.9 33
## 82 66.2 41.73333333
## 83 64.4 48.86666667
## 84 1.8 <NA>
## 85 41.3 41.8
## 86 12 35.91764706
## 87 <NA> <NA>
## 88 <NA> 14.61111111
## 89 <NA> 7.593333333
## 90 16.4 37.51
## 91 3.6 33.05714286
## 92 <NA> 21.06
## 93 8.6 15.75
## 94 <NA> 11.34736842
## 95 <NA> <NA>
## 96 <NA> 23.98947368
## 97 <NA> 16.28421053
## 98 2.8 20.73333333
## 99 <NA> 12.9
## 100 0.4 14.36666667
## 101 0.5 15.22105263
## 102 37.5 44.625
## 103 19.8 27.4
## 104 <NA> <NA>
## 105 <NA> 15.24545455
## 106 <NA> 20.35
## 107 <NA> <NA>
## 108 0.4 24.65
## 109 23.1 33.43333333
## 110 <NA> 16.88333333
## 111 <NA> 13.2
## 112 19.6 43.1
## 113 52.3 44.96666667
## 114 2 <NA>
## 115 <NA> <NA>
## 116 <NA> 16.84444444
## 117 <NA> 12.83157895
## 118 <NA> <NA>
## 119 68.4 78.36666667
## 120 49.9 68.275
## 121 <NA> 22.9
## 122 0.8 17.96666667
## 123 68.3 44.5
## 124 <NA> 11.69333333
## 125 <NA> 20.3
## 126 58.4 31.9
## 127 <NA> 17.76666667
## 128 4.1 26.89090909
## 129 <NA> 33.8
## 130 0.9 18.17222222
## 131 <NA> <NA>
## 132 7.3 21.5125
## 133 1.2 24.32
## 134 6.4 25.05
## 135 61.9 69.96666667
## 136 38.3 25.85
## 137 40.9 41
## 138 <NA> 24.9
## 139 17.5 33.2
## 140 <NA> 9.3
## 141 <NA> <NA>
## 142 <NA> <NA>
## 143 16.5 30.8
## 144 91 63.41666667
## 145 33 45.16
## 146 0.4 26.05454545
## 147 <NA> <NA>
## 148 <NA> 8.311764706
## 149 <NA> <NA>
## 150 38.3 32.7875
## 151 <NA> <NA>
## 152 <NA> 27.96315789
## 153 56.6 46.9
## 154 4.5 26.99
## 155 6.6 29.825
## 156 5.8 32.08571429
## 157 <NA> 15.54444444
## 158 <NA> 14.76315789
## 159 <NA> <NA>
## 160 <NA> 17.5
## 161 <NA> 24.69375
## 162 <NA> 17.2
## 163 48.8 58.15
## 164 6.3 23.95
## 165 <NA> <NA>
## 166 11.7 37.3
## 167 <NA> <NA>
## 168 50.8 39.775
## 169 0.1 23.07
## 170 0.9 17.2
## 171 59.2 49.83333333
## 172 <NA> <NA>
## 173 <NA> <NA>
## 174 <NA> 12.17222222
## 175 <NA> 8.127777778
## 176 <NA> 46.25
## 177 <NA> <NA>
## 178 6.3 39.075
## 179 <NA> 55.8
## 180 <NA> 17.29473684
## 181 2.9 25.36
## 182 <NA> <NA>
## 183 1.9 19.4
## 184 <NA> <NA>
## 185 <NA> <NA>
## 186 52.3 35.9
## 187 2.9 21.7
## 188 <NA> 10.42631579
## 189 <NA> 11.83125
## 190 <NA> 29.625
## 191 7.4 34.72
## 192 57.1 51.83333333
## 193 0.6 20.65882353
## 194 48.3 44.2
## 195 37.6 51.28
## 196 0.9 20.83333333
## 197 0.6 <NA>
## 198 0.8 20.575
## 199 <NA> 21.94
## 200 0.2 <NA>
## 201 <NA> <NA>
## 202 2.1 27
## 203 57.2 50
## 204 0.2 13.00555556
## 205 <NA> 16.25
## 206 <NA> 13.89473684
## 207 <NA> 19.14
## 208 <NA> 23.065
## 209 1.7 50.2
## 210 <NA> 33.25
## 211 <NA> 33.075
## 212 1.9 25.82
## 213 <NA> <NA>
## 214 0.6 18.9625
## 215 48.5 34.45
## 216 47.9 63.06
## 217 25.8 43
## Population_Rural Prevalence.of.HIV.total....of.population.ages.15.49.
## 1 23970533.76 0.1
## 2 1308266.095 0.1
## 3 11725015.71 0.1
## 4 6643.761905 <NA>
## 5 8656.571429 <NA>
## 6 9902981.571 1.745
## 7 64215.28571 <NA>
## 8 3721265.857 0.39
## 9 1109743.905 0.17
## 10 58498.09524 <NA>
## 11 3335954.333 0.1
## 12 3575886.667 <NA>
## 13 4244327.667 0.1
## 14 64618.66667 1.39
## 15 137562.2857 <NA>
## 16 104651411.4 0.1
## 17 188545.8095 1.07
## 18 2263897.381 0.28
## 19 245864.381 0.19
## 20 185189.8571 1.37
## 21 5928815.381 1.14
## 22 0 <NA>
## 23 452387.1429 0.2
## 24 3448099.333 0.315
## 25 1959002.429 <NA>
## 26 742751.0476 21.19
## 27 29241718.1 0.485
## 28 16551.71429 <NA>
## 29 97432.71429 <NA>
## 30 1939507.714 0.1
## 31 13033995.48 1.015
## 32 9068831.476 1.51
## 33 187728.2857 0.83
## 34 11866979.38 0.805
## 35 10017097.76 3.885
## 36 6678381.238 0.2
## 37 0 <NA>
## 38 2786369.048 4.61
## 39 10749496.38 1.46
## 40 110929.1905 <NA>
## 41 2262842.333 0.375
## 42 639794213.2 <NA>
## 43 9690497.524 0.415
## 44 501018.4286 0.1
## 45 44703671.81 0.99
## 46 1681629.238 3.69
## 47 1193027.286 0.35
## 48 12496495.76 3.29
## 49 1831440.238 0.1
## 50 2612732.048 0.39
## 51 15477.14286 <NA>
## 52 385279.3333 0.1
## 53 2763000.952 0.1
## 54 723705.4286 0.1
## 55 221745 <NA>
## 56 21287.42857 <NA>
## 57 2406635.905 1.13
## 58 5813075.381 0.37
## 59 54663154.67 0.1
## 60 1962566.857 0.515
## 61 424576.7143 6.015
## 62 1897286.476 0.8
## 63 418074.7619 0.705
## 64 873650.9048 27.91
## 65 80382410.52 1.33
## 66 29269.66667 <NA>
## 67 419334.1905 0.155
## 68 839968.2381 <NA>
## 69 13662287.43 0.34
## 70 107536.7143 <NA>
## 71 243538.9524 4
## 72 879443.0952 1.805
## 73 1637912.762 0.195
## 74 18959086.95 0.1
## 75 12832511.57 1.925
## 76 0 <NA>
## 77 2470947.524 0.155
## 78 8313 <NA>
## 79 72341.38095 <NA>
## 80 9323 <NA>
## 81 7731157.238 0.295
## 82 7387051.381 1.635
## 83 996546.7143 3.565
## 84 564070.5238 1.38
## 85 5024909.476 1.765
## 86 4062541.524 0.39
## 87 0 <NA>
## 88 3012544.476 <NA>
## 89 21280.42857 0.1
## 90 871947246.9 0.305
## 91 122094823.1 0.315
## 92 22222793.43 0.1
## 93 10577102.62 0.1
## 94 1752304.762 0.163157895
## 95 39456.57143 <NA>
## 96 639919.6667 <NA>
## 97 18194522.38 0.255
## 98 1257545.714 1.37
## 99 12782392.14 <NA>
## 100 1020082.143 0.1
## 101 7600387.19 <NA>
## 102 33378277.9 5.305
## 103 56149.38095 <NA>
## 104 9850210.667 <NA>
## 105 9271045.048 <NA>
## 106 <NA> <NA>
## 107 0 0.1
## 108 3780829.024 0.155
## 109 4499112.286 0.285
## 110 654232.5238 0.495
## 111 658810 0.1
## 112 1541649.381 23.095
## 113 2202226 1.475
## 114 1359240.524 0.135
## 115 31575.14286 <NA>
## 116 990221.7143 0.145
## 117 57378.28571 0.2
## 118 0 <NA>
## 119 15880585.38 0.21
## 120 13649663.71 10.04
## 121 8155564.857 0.385
## 122 251392.4762 0.1
## 123 10786790.33 1.225
## 124 25668.19048 0.155
## 125 12290.47619 <NA>
## 126 1889007.952 0.435
## 127 734296.0476 <NA>
## 128 24882013.57 0.35
## 129 85096.09524 <NA>
## 130 1593555.762 0.645
## 131 0 <NA>
## 132 968801.381 0.1
## 133 215710.8571 0.1
## 134 13511740.57 0.1
## 135 16867680.33 11.71
## 136 35410101.57 0.87
## 137 1266037.905 12.87
## 138 0 <NA>
## 139 22682431.05 0.17
## 140 1968441.048 0.189473684
## 141 85044.2381 <NA>
## 142 619458.1429 0.1
## 143 2536489.048 0.265
## 144 15776179.43 0.355
## 145 96553618.29 <NA>
## 146 814017 0.1
## 147 4736.952381 <NA>
## 148 982412.0476 <NA>
## 149 695857.5714 0.1
## 150 133931919.8 0.12
## 151 4332.666667 <NA>
## 152 1288529.286 0.885
## 153 7193802.048 0.77
## 154 2396036.952 0.34
## 155 6979358.476 0.37
## 156 54163801.9 0.13
## 157 14904158.24 0.1
## 158 3944734.857 0.585
## 159 219506.5714 <NA>
## 160 22695.85714 0.1
## 161 9285937.524 0.1
## 162 37360822.33 <NA>
## 163 9247334.19 3.38
## 164 160686.7143 <NA>
## 165 1222.238095 <NA>
## 166 60974.7619 0.96
## 167 4554874.952 0.1
## 168 7639261.143 0.505
## 169 3192188.714 0.1
## 170 41945.57143 <NA>
## 171 4019073.238 1.58
## 172 0 <NA>
## 173 0 <NA>
## 174 2463718.143 0.1
## 175 956938.2857 0.1
## 176 476740.9524 <NA>
## 177 7845793.905 <NA>
## 178 19841596.29 17.58
## 179 7971479.714 2.28
## 180 9594185.619 0.305
## 181 17180006.57 0.1
## 182 32183.71429 <NA>
## 183 138475.619 <NA>
## 184 <NA> <NA>
## 185 53796.7619 <NA>
## 186 25675997.33 0.1
## 187 191603.1905 1.46
## 188 1335513.952 <NA>
## 189 2125101.381 0.2
## 190 9470445.714 0.1
## 191 6096993.476 0.135
## 192 34562357 5.425
## 193 37686173.38 1.55
## 194 818006 0.135
## 195 4423771.238 2.6
## 196 81435.2381 <NA>
## 197 617704.6667 1.285
## 198 3607941.667 0.1
## 199 20945003.9 <NA>
## 200 3000828.143 <NA>
## 201 2868.380952 <NA>
## 202 4431.857143 <NA>
## 203 28365603.48 6.07
## 204 14228444.71 <NA>
## 205 1106759.762 0.1
## 206 11446102.52 <NA>
## 207 58560037.67 0.4
## 208 179461.1905 0.49
## 209 14974166.1 <NA>
## 210 193327.5714 <NA>
## 211 3400355.81 0.525
## 212 60699087.57 0.36
## 213 5445.142857 <NA>
## 214 1017335.381 <NA>
## 215 19513585.71 0.1
## 216 9087849.429 12.95
## 217 9438587.714 15.025
## Incidence.of.tuberculosis..per.100.000.people..
## 1 187.8571429
## 2 17.19047619
## 3 69.47619048
## 4 6.771428571
## 5 8.157142857
## 6 361.3333333
## 7 4.452380952
## 8 27.71428571
## 9 51.61904762
## 10 9.3
## 11 6.361904762
## 12 8.557142857
## 13 80.14285714
## 14 14.3952381
## 15 21.57142857
## 16 221
## 17 2.012857143
## 18 52.23809524
## 19 9.704761905
## 20 32.0952381
## 21 63.23809524
## 22 3.757142857
## 23 180.7142857
## 24 127.952381
## 25 44.76190476
## 26 453
## 27 46.28571429
## 28 1.85047619
## 29 63.71428571
## 30 33.66666667
## 31 54.47619048
## 32 139.6666667
## 33 67.66666667
## 34 399.1904762
## 35 234.3809524
## 36 5.414285714
## 37 5.019047619
## 38 540
## 39 145.5238095
## 40 <NA>
## 41 16.61904762
## 42 71.66666667
## 43 34.23809524
## 44 34.85714286
## 45 323.8571429
## 46 388.9047619
## 47 12.46190476
## 48 186.0952381
## 49 16.51428571
## 50 7.495238095
## 51 3.388571429
## 52 4.876190476
## 53 6.866666667
## 54 6.166666667
## 55 366.8095238
## 56 11.17142857
## 57 52.66666667
## 58 44.9047619
## 59 15.9047619
## 60 46.28571429
## 61 240.4761905
## 62 140.5238095
## 63 24.06666667
## 64 935.1904762
## 65 229.6666667
## 66 <NA>
## 67 40.38095238
## 68 5.733333333
## 69 9.095238095
## 70 22.52380952
## 71 547.3809524
## 72 172
## 73 115.6666667
## 74 6.352380952
## 75 169.3333333
## 76 <NA>
## 77 5.157142857
## 78 150.2857143
## 79 3.020952381
## 80 46
## 81 27.71428571
## 82 186.5238095
## 83 359.7142857
## 84 95.0952381
## 85 211.2857143
## 86 42.71428571
## 87 74.80952381
## 88 12.95714286
## 89 3.604761905
## 90 252
## 91 341.9047619
## 92 15.57142857
## 93 38.28571429
## 94 8.5
## 95 <NA>
## 96 4.866666667
## 97 6.595238095
## 98 4.314285714
## 99 18.03333333
## 100 5.8
## 101 117.2857143
## 102 439.047619
## 103 410.1428571
## 104 513
## 105 76.76190476
## 106 <NA>
## 107 25.27142857
## 108 127.5714286
## 109 202.4761905
## 110 45.38095238
## 111 11.50952381
## 112 931.9047619
## 113 294.6666667
## 114 44.52380952
## 115 <NA>
## 116 57.52380952
## 117 7.528571429
## 118 72.66666667
## 119 242
## 120 261.2857143
## 121 85.71428571
## 122 39.33333333
## 123 59.57142857
## 124 11.65238095
## 125 424.4761905
## 126 124.7619048
## 127 12.33333333
## 128 22.57142857
## 129 145.1904762
## 130 109.047619
## 131 1.190952381
## 132 431.952381
## 133 19.21052632
## 134 98.33333333
## 135 355.6190476
## 136 454
## 137 792.7619048
## 138 97.33333333
## 139 290.5714286
## 140 6.147619048
## 141 18.06190476
## 142 7.852380952
## 143 48.66666667
## 144 107
## 145 219
## 146 21.52380952
## 147 80.19047619
## 148 5.976190476
## 149 10.95238095
## 150 272.6190476
## 151 77.38095238
## 152 52.76190476
## 153 432
## 154 44.42857143
## 155 135.1428571
## 156 552.2380952
## 157 19.4047619
## 158 26.04761905
## 159 1.975238095
## 160 36.04761905
## 161 93.95238095
## 162 70.0952381
## 163 76
## 164 11.07619048
## 165 0.195238095
## 166 122.8571429
## 167 14.18571429
## 168 127.2380952
## 169 24.31578947
## 170 16.89047619
## 171 306.6190476
## 172 43
## 173 6.607142857
## 174 8.5
## 175 8.685714286
## 176 77.19047619
## 177 273.1904762
## 178 954.4285714
## 179 227
## 180 13.86190476
## 181 64.9047619
## 182 4.40952381
## 183 6.642857143
## 184 <NA>
## 185 10.55714286
## 186 99.04761905
## 187 31
## 188 6.071428571
## 189 6.714285714
## 190 22.19047619
## 191 121.5714286
## 192 356.1904762
## 193 178
## 194 498
## 195 56.52380952
## 196 12.38095238
## 197 18.0952381
## 198 32.38095238
## 199 22.19047619
## 200 66.61904762
## 201 18.05238095
## 202 219.7142857
## 203 210.7142857
## 204 102.7619048
## 205 1.448571429
## 206 11.63333333
## 207 3.833333333
## 208 28.85714286
## 209 87.80952381
## 210 61.33333333
## 211 35.04761905
## 212 216.3809524
## 213 <NA>
## 214 0.923809524
## 215 56.85714286
## 216 445.4285714
## 217 356.4761905
## Diabetes.prevalence....of.population.ages.20.to.79. Hospital_beds_1000ppl
## 1 9.25 0.417894737
## 2 6.5 2.953888889
## 3 7.05 1.699230769
## 4 20.3 <NA>
## 5 7.55 2.72
## 6 3.75 0.775
## 7 12.1 2.782666667
## 8 5.45 4.314615385
## 9 7.05 4.359444444
## 10 8.35 <NA>
## 11 6.5 3.837692308
## 12 5.6 7.556111111
## 13 4.2 5.260588235
## 14 10.45 2.885555556
## 15 15.4 1.972941176
## 16 12.35 0.617142857
## 17 13.2 6.4875
## 18 6.8 10.21235294
## 19 4.2 5.994210526
## 20 15.75 1.119999997
## 21 1.55 0.458461538
## 22 12.5 <NA>
## 23 8.05 1.777894737
## 24 6.05 1.136842105
## 25 8.3 2.207058824
## 26 8 2.284210526
## 27 9.45 2.428888889
## 28 8.65 <NA>
## 29 10.25 3.055789474
## 30 7.05 6.745555556
## 31 2.55 0.5
## 32 4.6 0.79
## 33 3.75 1.989333333
## 34 5.1 0.685555556
## 35 5.8 1.816666667
## 36 8.05 2.826111111
## 37 12.5 <NA>
## 38 4.5 1.1
## 39 4.85 0.446666667
## 40 5.6 <NA>
## 41 10.15 2.194210526
## 42 9.7 3.098333333
## 43 9.05 1.485333335
## 44 10.05 2.18
## 45 4.45 0.95
## 46 5.5 1.6
## 47 9.2 1.217894737
## 48 3.5 0.4
## 49 5 5.59
## 50 8.5 4.390526316
## 51 11.7 <NA>
## 52 8.95 2.745882353
## 53 6.2 7.03
## 54 5.45 3.161111111
## 55 6.85 1.406153846
## 56 10.3 3.661666667
## 57 9.3 1.551249994
## 58 5.5 1.473157895
## 59 18.75 1.526111111
## 60 7.85 1.029411765
## 61 4.85 2.066666667
## 62 5.05 1.232777778
## 63 6.75 5.132222222
## 64 3.85 2.08
## 65 4.2 1.255
## 66 3.8 <NA>
## 67 14.3 1.982307692
## 68 5.95 5.321666667
## 69 5.35 6.775
## 70 16.9 <NA>
## 71 7.9 2.89
## 72 1.95 1.034444444
## 73 4.25 3.842777778
## 74 6.1 8.248888889
## 75 3.8 0.75125
## 76 <NA> <NA>
## 77 5.75 4.514705882
## 78 3.3 <NA>
## 79 10.6 3.367692293
## 80 13.85 <NA>
## 81 11.2 0.565
## 82 3.25 0.3
## 83 2.55 0.85
## 84 14.2 2.026428571
## 85 7.75 3.23
## 86 5.9 0.603157895
## 87 7.7 <NA>
## 88 6.5 7.214210526
## 89 4.35 3.42394
## 90 9.3 1.640555554
## 91 7.85 0.902631579
## 92 10.1 1.710588235
## 93 9.9 1.223684211
## 94 4.1 3.663888889
## 95 6.3 1.646139087
## 96 7.95 3.343157895
## 97 5.75 3.536111111
## 98 13.4 1.727777776
## 99 7.15 13.46055556
## 100 13.75 1.581052632
## 101 7.15 6.617777778
## 102 4.55 1.3825
## 103 23.7 1.557142857
## 104 8.5 13.75
## 105 7.15 9.69
## 106 <NA> <NA>
## 107 22.8 2.285555556
## 108 6.45 4.787368421
## 109 4.75 1.244736842
## 110 6.85 6.442777778
## 111 13.8 3.122105263
## 112 4 1.3
## 113 2.7 1.072
## 114 11.25 3.573684211
## 115 5.4 <NA>
## 116 6.8 6.935
## 117 5.2 5.164736842
## 118 7.5 <NA>
## 119 4.65 0.336666667
## 120 6.45 1.2
## 121 15.55 1.911052632
## 122 9.2 4.090769231
## 123 2 0.273333333
## 124 7.3 5.439444444
## 125 22.4 2.715
## 126 3.2 0.4
## 127 18.7 3.465789474
## 128 16.25 1.025263158
## 129 15.65 3.28
## 130 4.15 5.84
## 131 5.9 17.30444444
## 132 7 7.416315789
## 133 8.4 3.917777778
## 134 7.95 0.888823529
## 135 3.2 0.786875
## 136 7.1 0.881666667
## 137 7.25 3
## 138 21.9 3.96
## 139 6.15 0.255789474
## 140 4.9 3.829444444
## 141 16 <NA>
## 142 7.4 2.684615385
## 143 10.15 0.893157893
## 144 4.7 0.294545455
## 145 4.2 0.5
## 146 6.9 4.396111111
## 147 23.4 <NA>
## 148 4.15 4.223888889
## 149 12.15 1.452222222
## 150 19.35 0.474705882
## 151 14.05 5.03
## 152 8.8 2.085
## 153 12.15 0.17
## 154 7 0.852222222
## 155 5.4 1.50235294
## 156 8.4 1.028947368
## 157 7.9 6.467222222
## 158 9.35 3.435
## 159 13.15 <NA>
## 160 19.65 1.38
## 161 7.1 6.811666667
## 162 7.65 8.384736842
## 163 4.8 0.891875
## 164 8.45 1.326666667
## 165 6.45 3.415
## 166 5.5 3.0475
## 167 19.15 2.084210526
## 168 3.15 0.51
## 169 8.4 5.256875
## 170 10.3 3.692222222
## 171 2.65 0.4
## 172 10.55 2.428888889
## 173 <NA> <NA>
## 174 5.75 6.226111111
## 175 6.65 4.573333333
## 176 17.55 1.665
## 177 5.4 0.77
## 178 8.9 2.518
## 179 6.5 <NA>
## 180 8.3 3.098947368
## 181 9.4 3.578888889
## 182 12.3 5.088235294
## 183 10.05 2.019090909
## 184 <NA> <NA>
## 185 8.4 3.574285714
## 186 13.75 0.731666667
## 187 10.8 2.948
## 188 4.6 2.594444444
## 189 5.25 5.009473684
## 190 12.4 1.450526316
## 191 6.45 4.978421053
## 192 7.55 0.704
## 193 8.6 2.085263158
## 194 8.05 5.9
## 195 2.7 0.67
## 196 13.85 2.426666667
## 197 12.7 2.428823529
## 198 9.55 1.821052632
## 199 11.2 2.646666667
## 200 4.7 4.158947368
## 201 <NA> <NA>
## 202 19.75 <NA>
## 203 3.7 0.74
## 204 4.25 8.138888889
## 205 17.6 1.497894737
## 206 5.75 2.978947368
## 207 10.05 2.963333333
## 208 7.35 2.049999999
## 209 6.7 4.821578947
## 210 15.85 3.16
## 211 9.9 0.961818182
## 212 4.65 2.014666667
## 213 12.25 <NA>
## 214 9.2 <NA>
## 215 7.5 0.638666667
## 216 8.35 1.966666667
## 217 5.95 2.081666667
## Physicians_1000ppl Death.rate..crude..per.1.000.people...SP.DYN.CDRT.IN.
## 1 0.245933333 8.133000
## 2 1.387615385 7.823500
## 3 1.4515 4.725300
## 4 <NA> 5.340000
## 5 3.294 3.616667
## 6 0.154 10.626850
## 7 1.731333333 5.845500
## 8 3.860957143 7.739100
## 9 2.829428571 9.877350
## 10 <NA> 7.921250
## 11 3.297555556 6.575000
## 12 4.876736842 9.420000
## 13 3.411647059 6.085000
## 14 2.220333333 6.670600
## 15 0.981928571 2.130050
## 16 0.434631579 6.006450
## 17 2.1825 8.984800
## 18 4.598777778 13.959350
## 19 3.151789474 9.760000
## 20 1.0305 5.042150
## 21 0.056285714 10.263200
## 22 <NA> 7.720000
## 23 0.335846154 6.894550
## 24 0.7564 7.992650
## 25 1.696538462 11.019600
## 26 0.3445 9.099650
## 27 2.0644 6.551050
## 28 <NA> 5.629350
## 29 1.414071429 4.132350
## 30 3.736428571 15.585000
## 31 0.056076923 10.807900
## 32 0.059 9.837600
## 33 0.601888889 5.585650
## 34 0.215428571 6.324000
## 35 0.101777778 10.213850
## 36 2.311842857 7.410000
## 37 <NA> 3.410000
## 38 0.0597 13.600600
## 39 0.046 14.571350
## 40 <NA> 8.331753
## 41 1.774789474 6.050000
## 42 1.631055556 6.990000
## 43 1.821157895 5.417650
## 44 0.224333333 9.121100
## 45 0.16175 11.171200
## 46 0.141428571 8.103900
## 47 2.549 4.927050
## 48 0.156428571 10.701700
## 49 2.899058824 12.494750
## 50 7.0593125 9.025000
## 51 <NA> 8.700850
## 52 2.417666667 6.646300
## 53 3.852321053 10.645000
## 54 3.724117647 9.700000
## 55 0.187 9.133900
## 56 1.126 10.411300
## 57 1.35725 5.987550
## 58 1.888857143 5.056000
## 59 0.714714286 5.973950
## 60 2.20644 7.090300
## 61 0.2675 10.443250
## 62 0.054444444 7.389550
## 63 3.327833333 12.265000
## 64 0.152875 13.915500
## 65 0.040555556 8.744050
## 66 <NA> 8.065000
## 67 0.5638 7.494500
## 68 3.589111111 9.640000
## 69 3.335 8.840000
## 70 <NA> 3.120200
## 71 0.457828571 8.016900
## 72 0.098363636 8.590250
## 73 5.105 12.652900
## 74 3.908210526 10.910000
## 75 0.106333333 8.250100
## 76 <NA> 8.095750
## 77 5.8465625 10.740000
## 78 <NA> 8.445000
## 79 0.94 7.507550
## 80 <NA> 5.484900
## 81 0.70925 5.405700
## 82 0.127333333 11.299600
## 83 0.134333333 10.615700
## 84 1.028 7.689750
## 85 0.174 9.096250
## 86 0.4044 4.510100
## 87 <NA> 6.215000
## 88 3.144210526 13.410000
## 89 3.676294118 6.390000
## 90 0.7025 7.456900
## 91 0.3476 7.772600
## 92 1.064 5.107500
## 93 0.737 5.617600
## 94 3.555947368 6.505000
## 95 1.449 10.360800
## 96 3.545421053 5.340000
## 97 3.861263158 10.395000
## 98 0.493375 6.827800
## 99 2.294444444 9.975000
## 100 2.317384615 3.513800
## 101 3.798833333 8.660500
## 102 0.183888889 7.942400
## 103 0.23775 6.713200
## 104 3.449 8.670150
## 105 2.046833333 5.440000
## 106 <NA> 5.199150
## 107 2.118142857 2.055450
## 108 2.333538462 6.245000
## 109 0.342285714 7.785150
## 110 3.139277778 14.775000
## 111 2.350636364 4.802900
## 112 0.189 16.861100
## 113 0.029666667 9.997900
## 114 1.865166667 5.078050
## 115 <NA> 6.665000
## 116 4.101 13.960000
## 117 2.730126667 7.370000
## 118 <NA> 3.794650
## 119 0.166384615 7.565200
## 120 0.039125 10.274050
## 121 1.319277778 4.992200
## 122 1.621111111 3.131350
## 123 0.093933333 11.261350
## 124 2.8113 7.705000
## 125 0.564 6.965100
## 126 0.159857143 8.031800
## 127 1.575888889 7.875000
## 128 2.115777778 6.148300
## 129 0.584533333 5.061950
## 130 2.874875 13.412700
## 131 7.36225 9.500000
## 132 3.0641 6.366800
## 133 2.233210526 10.185000
## 134 0.672666667 5.925650
## 135 0.060333333 10.858550
## 136 0.559846154 9.262000
## 137 0.47375 10.599650
## 138 1.103 7.555400
## 139 0.677125 7.067850
## 140 3.181444444 8.630000
## 141 <NA> 5.430000
## 142 2.789894737 6.802500
## 143 0.727 4.893000
## 144 0.032833333 10.366100
## 145 0.359909091 14.311350
## 146 2.512916667 10.022668
## 147 <NA> 4.150000
## 148 4.257052632 8.295000
## 149 1.939277778 2.675700
## 150 0.821066667 7.452200
## 151 1.5242 8.675000
## 152 1.479470588 4.937550
## 153 0.0572 6.946150
## 154 1.785125 5.737350
## 155 1.337166667 6.563250
## 156 1.035153846 5.705450
## 157 2.3219375 10.495000
## 158 4.306222222 10.475000
## 159 <NA> 8.438850
## 160 2.800875 1.359200
## 161 2.455833333 13.160000
## 162 4.420333333 14.055000
## 163 0.080684615 7.929500
## 164 0.377714286 5.315250
## 165 5.16675 7.378947
## 166 0.438666667 6.832600
## 167 2.214142857 2.679550
## 168 0.1206625 6.993150
## 169 2.878142857 14.748500
## 170 1.341583333 7.810000
## 171 0.038916667 12.435300
## 172 1.891235294 4.775000
## 173 <NA> 4.443850
## 174 3.428052632 10.090000
## 175 2.648444444 9.650000
## 176 0.173142857 5.434650
## 177 0.0255 13.531400
## 178 0.772833333 10.888750
## 179 0.04 11.944850
## 180 3.783944444 8.820000
## 181 0.797166667 7.468900
## 182 2.95 9.297600
## 183 1.048908333 7.738950
## 184 <NA> 4.058200
## 185 0.833333333 8.293200
## 186 0.334333333 7.717800
## 187 0.921166667 7.579450
## 188 4.042444444 9.515000
## 189 4.017421053 8.100000
## 190 1.400111111 5.130750
## 191 1.79 5.600300
## 192 0.046666667 8.224050
## 193 0.515666667 6.658250
## 194 0.411692308 7.771700
## 195 0.059781818 9.738600
## 196 0.734857143 6.740150
## 197 2.261818182 7.104800
## 198 1.1282 5.748400
## 199 1.687555556 5.340250
## 200 2.28925 6.592100
## 201 <NA> 6.259700
## 202 1.0668 10.039400
## 203 0.162857143 8.253700
## 204 3.233416667 15.785000
## 205 1.983277778 1.266000
## 206 2.682421053 9.280000
## 207 3.08575 8.551900
## 208 4.77125 9.598600
## 209 2.565916667 4.965000
## 210 0.144166667 5.809900
## 211 1.664 6.155400
## 212 0.723 6.307650
## 213 <NA> 7.090000
## 214 1.971 3.603900
## 215 0.303 6.143050
## 216 0.139363636 9.017250
## 217 0.1294 11.854750
## Rural.land.area..sq..km...AG.LND.TOTL.RU.K2.
## 1 636173.2292
## 2 27331.60161
## 3 2305478.23
## 4 198.2477988
## 5 409.4971338
## 6 1250999.883
## 7 396.33478
## 8 2735389.702
## 9 27342.21829
## 10 84.80002661
## 11 7650418.083
## 12 80030.88661
## 13 82218.65449
## 14 12279.91847
## 15 335.707418
## 16 79328.8629
## 17 248.5273247
## 18 201456.0045
## 19 22780.52409
## 20 21708.44766
## 21 113691.0639
## 22 20.30314892
## 23 39040.72891
## 24 1059511.349
## 25 49134.66048
## 26 572887.2314
## 27 8385496.484
## 28 147.7165994
## 29 5571.881756
## 30 108913.9424
## 31 273918.352
## 32 24150.62687
## 33 4043.854845
## 34 176180.6789
## 35 463837.3154
## 36 9197138.473
## 37 229.4268204
## 38 622165.0808
## 39 1272923.572
## 40 <NA>
## 41 723594.5312
## 42 8723723.06
## 43 1127197.644
## 44 1534.767163
## 45 2295898.507
## 46 340340.844
## 47 49804.71155
## 48 318023.6312
## 49 54777.08598
## 50 106664.7578
## 51 316.322036
## 52 8747.627334
## 53 73926.22029
## 54 40365.627
## 55 21571.7749
## 56 735.8386756
## 57 45389.62872
## 58 253213.9217
## 59 971206.1986
## 60 18676.37202
## 61 26898.23531
## 62 120001.0006
## 63 42656.57242
## 64 17073.22018
## 65 1124616.275
## 66 1385.837107
## 67 18883.78571
## 68 302765.4983
## 69 522936.0993
## 70 3984.443843
## 71 263337.7103
## 72 10197.83849
## 73 68068.72537
## 74 316383.4801
## 75 227830.2993
## 76 0.04744059
## 77 128795.9109
## 78 315961.3547
## 79 291.3748994
## 80 466.8133246
## 81 105527.1242
## 82 244205.7532
## 83 33293.4172
## 84 209904.1444
## 85 25688.0683
## 86 110411.1556
## 87 591.1958519
## 88 87565.24504
## 89 88792.01078
## 90 2956471.266
## 91 1820838.066
## 92 1601053.354
## 93 434269.6068
## 94 67698.15848
## 95 538.4579341
## 96 19195.22991
## 97 277070.5771
## 98 10006.15649
## 99 316736.2049
## 100 86673.39544
## 101 2636600.485
## 102 579896.1624
## 103 907.5566663
## 104 119025.2203
## 105 86793.3716
## 106 <NA>
## 107 16647.65026
## 108 186901.2745
## 109 228354.3777
## 110 62761.73831
## 111 8546.202041
## 112 30172.3371
## 113 95557.59073
## 114 1619018.279
## 115 117.014356
## 116 62585.34796
## 117 2247.078424
## 118 7.574165477
## 119 588612.8579
## 120 93702.75745
## 121 318197.5661
## 122 259.6535489
## 123 1251575.144
## 124 150.8971631
## 125 271.8843642
## 126 1045547.794
## 127 1523.755108
## 128 1920993.85
## 129 757.7457415
## 130 31803.71496
## 131 0.034954518
## 132 1549425.131
## 133 13146.802
## 134 409515.811
## 135 775002.468
## 136 660979.5454
## 137 827174.2883
## 138 15.05947124
## 139 140132.7835
## 140 26211.90392
## 141 18753.56673
## 142 270548.6286
## 143 118297.5466
## 144 1187981.381
## 145 882024.6559
## 146 23681.03685
## 147 477.1777027
## 148 306188.4558
## 149 309151.5535
## 150 821721.3635
## 151 480.9659618
## 152 73613.76711
## 153 462122.298
## 154 396430.5902
## 155 1281204.386
## 156 282119.5821
## 157 293194.0002
## 158 87685.7262
## 159 6516.709161
## 160 10591.24111
## 161 229905.7683
## 162 16224183.47
## 163 22337.27207
## 164 2817.356677
## 165 36.00050701
## 166 994.1657239
## 167 1908957.389
## 168 194448.5732
## 169 74452.57718
## 170 475.1735125
## 171 71910.77947
## 172 210.1625761
## 173 9.184403793
## 174 46876.44364
## 175 19401.47491
## 176 28414.49527
## 177 635102.2238
## 178 1204427.393
## 179 623624.9877
## 180 488925.7056
## 181 61386.1325
## 182 253.8698195
## 183 544.6162965
## 184 28.0700068
## 185 329.1274058
## 186 1859899.893
## 187 145003.1468
## 188 411134.3059
## 189 34763.58127
## 190 180422.9231
## 191 134045.209
## 192 880726.3952
## 193 491891.911
## 194 14909.69391
## 195 56198.45264
## 196 701.7861313
## 197 4364.437481
## 198 151132.5215
## 199 754924.702
## 200 463257.5154
## 201 910.9274136
## 202 39.09779159
## 203 201994.6724
## 204 569709.5062
## 205 77233.34084
## 206 218902.7593
## 207 8903098.427
## 208 173706.5677
## 209 412602.1407
## 210 12337.56625
## 211 899029.2577
## 212 297626.6621
## 213 273.7444318
## 214 4390.870891
## 215 452881.3089
## 216 740140.2194
## 217 386236.172
## Surface.area..sq..km...AG.SRF.TOTL.K2.
## 1 652860
## 2 28750
## 3 2381740.4
## 4 200
## 5 470
## 6 1246700
## 7 440
## 8 2780400
## 9 29740
## 10 180
## 11 7741220
## 12 83879
## 13 86600
## 14 13880
## 15 765.25
## 16 148088.85
## 17 430
## 18 207603.5
## 19 30568.2
## 20 22970
## 21 114760
## 22 2172
## 23 38477.15
## 24 1098580
## 25 51210
## 26 581730
## 27 8515502.389
## 28 150
## 29 5770
## 30 110999.5
## 31 274216
## 32 27830
## 33 4030
## 34 181040
## 35 475440
## 36 15639891
## 37 264
## 38 622980
## 39 1284000
## 40 198
## 41 756307.4
## 42 9562910.85
## 43 1141580.008
## 44 1861
## 45 2344860
## 46 342000
## 47 51100
## 48 322460
## 49 72331.25
## 50 109883.5
## 51 444
## 52 9250.2
## 53 78870.178
## 54 42920
## 55 23200
## 56 750
## 57 68303.8926
## 58 256370
## 59 1001450
## 60 21040
## 61 28050
## 62 121452.219
## 63 45285
## 64 17360
## 65 1134645.505
## 66 3130.5835
## 67 18270
## 68 338395.3
## 69 549102.0305
## 70 3471
## 71 267670
## 72 11300
## 73 69700
## 74 357314
## 75 238538.6
## 76 10
## 77 131960
## 78 410450
## 79 340
## 80 540
## 81 108890
## 82 245860
## 83 36130
## 84 214970
## 85 27750
## 86 112490
## 87 1103.736842
## 88 93030
## 89 103000
## 90 3287260
## 91 1912981.514
## 92 1745150
## 93 436069.8
## 94 70280
## 95 570
## 96 22070
## 97 302068.9
## 98 10990
## 99 377951.1135
## 100 89157.6
## 101 2724901.8
## 102 580370
## 103 810
## 104 120540
## 105 100090.5
## 106 <NA>
## 107 17820
## 108 199949.6
## 109 236800
## 110 64593.4
## 111 10450
## 112 30360
## 113 111370
## 114 1759540
## 115 160
## 116 65294.5
## 117 2590
## 118 30.15789474
## 119 587193
## 120 118480
## 121 330614.95
## 122 300
## 123 1240190
## 124 320
## 125 180
## 126 1030700
## 127 2020.35
## 128 1964377.75
## 129 700
## 130 33849.5
## 131 60.3394
## 132 1564118.775
## 133 13810
## 134 446550
## 135 799380
## 136 676590
## 137 824290
## 138 20
## 139 147180
## 140 41539
## 141 18580
## 142 267710
## 143 130370
## 144 1267000
## 145 923770
## 146 25710
## 147 460
## 148 505087.6945
## 149 309500
## 150 796100
## 151 460
## 152 75375
## 153 462840
## 154 406751.3
## 155 1285220
## 156 300000
## 157 312687.5
## 158 92185.66
## 159 8870
## 160 11556
## 161 238395
## 162 17098246.5
## 163 26340
## 164 2840
## 165 60
## 166 960
## 167 2149690
## 168 196710
## 169 87854.5
## 170 460
## 171 72300
## 172 714.8
## 173 34
## 174 49032
## 175 20414
## 176 28900
## 177 637660
## 178 1219090
## 179 646883
## 180 505711.3541
## 181 65610
## 182 260
## 183 620
## 184 50
## 185 390
## 186 2160514.5
## 187 163820
## 188 488016.665
## 189 41290.7
## 190 185180
## 191 142023.2
## 192 947300
## 193 513120
## 194 14870
## 195 56790
## 196 750
## 197 5130
## 198 163610
## 199 785350
## 200 488721.8
## 201 950
## 202 30
## 203 241550
## 204 603550
## 205 98647.9
## 206 243610
## 207 9781640
## 208 176220
## 209 446031.4
## 210 12190
## 211 912050
## 212 330983.94
## 213 350
## 214 6020.263158
## 215 527970
## 216 752610
## 217 390760
# Rename columns with names that are likely to cause issues
names(data_some)[c(3,5,6,8,9,10,13,14,15)] <- c("HealthExp_PercGDP",
"UNDP_PovRatio_PercPop",
"SocLine_PovRatio_PercPop",
"PrevHIV_PercPop1549",
"IncTB_Per100k",
"PrevDiab_PercPop2079",
"Crude_Death_rate_Per1000",
"Rural_Land_Area_Sq_Km",
"Surface_Area_Sq_Km")
# # Turn the character columns in numeric apart from Country
data_some$HealthExp_PercGDP <- as.numeric(data_some$HealthExp_PercGDP)
data_some$GDP_per_Capita <- as.numeric(data_some$GDP_per_Capita)
data_some$UNDP_PovRatio_PercPop <- as.numeric(data_some$UNDP_PovRatio_PercPop)
data_some$SocLine_PovRatio_PercPop <- as.numeric(data_some$SocLine_PovRatio_PercPop)
data_some$Population_Rural <- as.numeric(data_some$Population_Rural)
data_some$PrevHIV_PercPop1549 <- as.numeric(data_some$PrevHIV_PercPop1549)
data_some$IncTB_Per100k <- as.numeric(data_some$IncTB_Per100k)
data_some$PrevDiab_PercPop2079 <- as.numeric(data_some$PrevDiab_PercPop2079)
data_some$Hospital_beds_1000ppl <- as.numeric(data_some$Hospital_beds_1000ppl)
data_some$Physicians_1000ppl <- as.numeric(data_some$Physicians_1000ppl)
data_some$Rural_Land_Area_Sq_Km <- as.numeric(data_some$Rural_Land_Area_Sq_Km)
data_some$Surface_Area_Sq_Km <- as.numeric(data_some$Surface_Area_Sq_Km)
Country1 <- data_some$Country
Before we start doing anything, we want to understand our data a bit better. Because we are using indicators, there might be high correlations between some of the variables in our dataset. If that’s the case, we want to address it and transform or drop variables before this becomes an issue in the analysis.
pairs(data_some[-1]) # pairs plot, notice we remove the Country column
data_some_corr <- as.data.frame(cor(data_some[-1], method = "pearson", use = "complete.obs")) # calculate Pearson's coefficient pairwise
Looking at the correlation patterns between the numerical variables, we notice that Population Total and Population Rural are highly correlated. Multidimensional poverty head count ratio (UNDP) is highly correlated with Poverty head count ratio at societal poverty line and, the Rural land area is highly correlated with the surface area. We consider “Highly correlated” variables with a Pearson’s Correlation Coefficient above 0.80.
To keep as much information as we can we:
# Drop variables listed in the paragraph above this chunk
library(dplyr)
data_some <- data_some %>% dplyr::select(-c(Population_Total,
SocLine_PovRatio_PercPop,
Surface_Area_Sq_Km))
# Re-examine correlation
pairs(data_some[-1]) # pairs plot, notice we remove the Country column
data_some_corr <- as.data.frame(cor(data_some[-1], method = "pearson", use = "complete.obs")) # calculate Pearson's coefficient pairwise
Now the correlations seems to never get so extreme.
At this stage, we want to address the missingness in the dataset. The R package MICE and the accompanying vignettes will be our tools. Through exploring the missingness pattern below, we notice that the missing values affect the two poverty ratio metrics and the HIV prevalence.
The code throws errors at us about matrix singularity, this is usually due to high collinearity (see troubleshooting). If we run the imputation adding variables one by one, we notice that it’s Population Rural causing issues. Therefore, we run the algorithm with the chosen method with everything but country and Population Rural in, and then with the Classification and Regression Trees method with it in.
library(mice)
md.pattern(data_some, rotate.names = TRUE) # examine missingness pattern
## Country Crude_Death_rate_Per1000 Population_Rural Rural_Land_Area_Sq_Km
## 96 1 1 1 1
## 50 1 1 1 1
## 12 1 1 1 1
## 30 1 1 1 1
## 1 1 1 1 1
## 1 1 1 1 1
## 1 1 1 1 1
## 1 1 1 1 1
## 13 1 1 1 1
## 1 1 1 1 1
## 3 1 1 1 1
## 2 1 1 1 1
## 1 1 1 1 1
## 1 1 1 1 1
## 1 1 1 1 1
## 1 1 1 1 0
## 1 1 1 0 1
## 1 1 1 0 0
## 0 0 2 2
## GDP_per_Capita PrevDiab_PercPop2079 IncTB_Per100k Physicians_1000ppl
## 96 1 1 1 1
## 50 1 1 1 1
## 12 1 1 1 1
## 30 1 1 1 1
## 1 1 1 1 1
## 1 1 1 1 1
## 1 1 1 1 1
## 1 1 1 1 1
## 13 1 1 1 0
## 1 1 1 0 1
## 3 1 1 0 0
## 2 1 0 1 0
## 1 0 1 1 1
## 1 0 1 1 0
## 1 0 0 0 0
## 1 1 1 0 0
## 1 1 0 0 0
## 1 1 0 0 0
## 3 5 8 23
## Hospital_beds_1000ppl HealthExp_PercGDP PrevHIV_PercPop1549
## 96 1 1 1
## 50 1 1 1
## 12 1 1 0
## 30 1 1 0
## 1 1 0 0
## 1 0 1 1
## 1 0 1 0
## 1 0 0 0
## 13 0 0 0
## 1 1 0 0
## 3 0 0 0
## 2 0 0 0
## 1 1 0 0
## 1 0 0 0
## 1 0 0 0
## 1 0 0 0
## 1 0 0 0
## 1 0 0 0
## 26 27 70
## UNDP_PovRatio_PercPop
## 96 1 0
## 50 0 1
## 12 1 1
## 30 0 2
## 1 0 3
## 1 0 2
## 1 1 2
## 1 1 3
## 13 0 5
## 1 0 4
## 3 0 6
## 2 0 6
## 1 0 4
## 1 0 6
## 1 0 8
## 1 0 7
## 1 0 8
## 1 0 9
## 107 273
# We run the mice code with 0 iterations to get some info on how MICE plans to proceed
imputed_data <- mice(data_some, maxit=0)
## Warning: Number of logged events: 1
predM <- imputed_data$predictorMatrix # Extract predictor matrix (i.e. which variables predict which missing variables)
meth <- imputed_data$method # Extract method of imputation per variable
# Perform multiple imputation
# Method
# The m = N means we get N datasets with N different imputations - if you have more than one then you have to pool to make inference. maxit = k means k number of iterations
imputed_data_1 <- mice(data_some[-1], m = 1, method = 'cart', maxit = 50, print = F)
# Plot the iteration trace plot for convergence diagnostics. (default = 5 iterations)
plot(imputed_data_1)
# Diagnostics of value range
#Stripplot diagnostics comparing imputed (red) and observed (blue) values for #variables with missing data. The substantial overlap between red and blue points #across all indicators indicates that the imputed values are consistent with the #observed data, suggesting plausible imputations
stripplot(imputed_data_1, pch=20, cex=2)
# Extract the actual element form the object resulting from mice()
imp_dataset_1 <- complete(imputed_data_1, 1) # take the first imputed dataset
# Let's re-add in the variables we removed (i.e. Country and Population Rural)
imp_dataset_1$Population_Rural <- data_some$Population_Rural
We can proceed to rescale the dataset, which is good practice before clustering to avoid magnifying or shrinking distances between features simply due to the way they get projected. There’s many ways to do this, we use the z-score standardisation.
# Z-score normalisation, that is subtracting the mean and dividing by the standard deviation
imp_dataset_scaled_1 <- as.data.frame(round(scale(imp_dataset_1), digits = 2))
# Let's re-add Country in
imp_dataset_scaled_1$Country <- data_some$Country
We proceed to cluster countries with hierarchical clustering with:
We follow:
# Create distance matrix
dist_scaled_1 <- dist(imp_dataset_scaled_1[-12], method = 'manhattan') # note we don't read "Country" in, it's not a feature to use
# 3. Perform hierarchical clustering (Complete linkage method) and create dendrogram
hclust_complete_1 <- hclust(dist_scaled_1, method = 'complete')
plot(hclust_complete_1, main = "Complete Linkage Dendrogram (CART)")
Now we need to decide where to cut the tree using dynamicTreeCut package. We use automated method, and a preference for less rather than more clusters.
# Load the dynamicTreeCut package
if (!requireNamespace("dynamicTreeCut", quietly = TRUE)) {
install.packages("dynamicTreeCut")
}
library(dynamicTreeCut)
# Apply dynamic tree cut to obtain cluster memberships.
#
#Here, we input a dendrogram only, as such, the algorithm uses the default "Tree" method. When both a dendrogram and a distance matrix are provided, the method automatically switches to "Hybrid".
cut_complete_1 <- cutreeDynamic(hclust_complete_1, deepSplit = 1, respectSmallClusters = TRUE, verbose = 0)
## Warning in cutreeDynamic(hclust_complete_1, deepSplit = 1, respectSmallClusters = TRUE, : cutreeDynamic: method "hybrid" requires a valid dissimilarity matrix "distM".
## Defaulting to method "tree".
# Plot dendrograms with cluster rectangles. k is the number of clusters
plot(hclust_complete_1, main = "Hierarchical Clustering Dendrogram (CART)", xlab = "", sub = "")
rect.hclust(hclust_complete_1, k = max(cut_complete_1), border = "red") # Use cut_complete_1
The subdivisions are highlighted in the graphs by rectangles.
Now we can reveal which countries fall within which cluster.
# Merge grouping to Country vector
clustering_complete_1 <- as.data.frame(cbind(cut_complete_1, imp_dataset_scaled_1$Country))
# Export data in excel form
library(writexl)
Indicators_df <- imp_dataset_1
Indicators_df <- cbind(Country1 = Country1, Indicators_df)
names0 <- c('Australia', 'Canada', 'Spain', 'United Kingdom', 'United States')
Cluster0 <- Indicators_df[Indicators_df$Country1 %in% names0, ]
names1 <- c('Bangladesh', 'Cambodia', 'China', 'Djibouti', 'India', 'Indonesia', "Lao PDR", 'Myanmar', 'Nepal', 'Nigeria', 'Pakistan', 'Thailand', 'Viet Nam')
Cluster1 <- Indicators_df[Indicators_df$Country1 %in% names1, ]
names2 <- c('Azerbaijan', 'Ecuador', 'Egypt, Arab Rep.', 'Iraq', 'Turkiye')
Cluster2 <- Indicators_df[Indicators_df$Country1 %in% names2, ]
names3 <- c('Chile')
Cluster3 <- Indicators_df[Indicators_df$Country1 %in% names3, ]
#Compare summary statistics across the 4 clusters
# Function to summarize a data frame
summary_stats <- function(df, name) {
df %>%
summarise(across(everything(), list(mean = mean, sd = sd, min = min, max = max))) %>%
mutate(DataFrame = name) # Add a column for dataset name
}
# Apply function to each data frame
Cluster1_summary <- summary_stats(Cluster0[-1], "Cluster1")
Cluster2_summary <- summary_stats(Cluster1[-1], "Cluster2")
Cluster3_summary <- summary_stats(Cluster2[-1], "Cluster3")
Cluster4_summary <- summary_stats(Cluster3[-1], "Cluster4")
# Combine all summaries
comparison <- bind_rows(Cluster1_summary, Cluster2_summary, Cluster3_summary, Cluster4_summary)
# Reorder columns to move "DataFrame" to the first position
comparison <- comparison %>% select(DataFrame, everything())
# Print the summary comparison
print(comparison)
## DataFrame HealthExp_PercGDP_mean HealthExp_PercGDP_sd HealthExp_PercGDP_min
## 1 Cluster1 10.854240 2.981532 8.864402
## 2 Cluster2 3.713824 1.179812 2.232583
## 3 Cluster3 4.598283 1.326172 3.322632
## 4 Cluster4 7.783749 NA 7.783749
## HealthExp_PercGDP_max GDP_per_Capita_mean GDP_per_Capita_sd
## 1 16.068211 45149.663 10208.683
## 2 6.688282 2307.789 1778.744
## 3 6.694435 5377.237 2710.282
## 4 7.783749 12636.477 NA
## GDP_per_Capita_min GDP_per_Capita_max UNDP_PovRatio_PercPop_mean
## 1 29285.6946 55979.989 0.38000
## 2 791.5463 6716.494 20.10769
## 3 2556.2981 9879.269 3.40000
## 4 12636.4768 12636.477 2.50000
## UNDP_PovRatio_PercPop_sd UNDP_PovRatio_PercPop_min UNDP_PovRatio_PercPop_max
## 1 0.2387467 0.1 0.7
## 2 14.9701626 0.6 43.6
## 3 3.4763487 0.3 8.6
## 4 NA 2.5 2.5
## Population_Rural_mean Population_Rural_sd Population_Rural_min
## 1 17922932 22923304 3335954
## 2 164772220 270313656 221745
## 3 19248533 20844093 4244328
## 4 2262842 NA 2262842
## Population_Rural_max PrevHIV_PercPop1549_mean PrevHIV_PercPop1549_sd
## 1 58560038 0.279000 0.1283745
## 2 871947247 1.370769 3.1391041
## 3 54663155 0.154000 0.1207477
## 4 2262842 0.375000 NA
## PrevHIV_PercPop1549_min PrevHIV_PercPop1549_max IncTB_Per100k_mean
## 1 0.100 0.400 8.220952
## 2 0.100 11.710 268.124542
## 3 0.100 0.370 40.285714
## 4 0.375 0.375 16.619048
## IncTB_Per100k_sd IncTB_Per100k_min IncTB_Per100k_max
## 1 4.302596 3.833333 13.86190
## 2 102.483110 71.666667 454.00000
## 3 25.178343 15.904762 80.14286
## 4 NA 16.619048 16.61905
## PrevDiab_PercPop2079_mean PrevDiab_PercPop2079_sd PrevDiab_PercPop2079_min
## 1 7.73 1.676529 5.75
## 2 8.15 4.108984 4.20
## 3 9.91 5.741559 4.20
## 4 10.15 NA 10.15
## PrevDiab_PercPop2079_max Hospital_beds_1000ppl_mean Hospital_beds_1000ppl_sd
## 1 10.05 3.141006 0.4012886
## 2 19.35 1.215939 0.8158868
## 3 18.75 2.426042 1.6768166
## 4 10.15 2.194211 NA
## Hospital_beds_1000ppl_min Hospital_beds_1000ppl_max Physicians_1000ppl_mean
## 1 2.8261111 3.837692 3.0323028
## 2 0.2557895 3.098333 0.5782396
## 3 1.2236842 5.260588 1.6879548
## 4 2.1942105 2.194211 1.7747895
## Physicians_1000ppl_sd Physicians_1000ppl_min Physicians_1000ppl_max
## 1 0.5656104 2.3118429 3.783944
## 2 0.3741296 0.1870000 1.631056
## 3 1.1026093 0.7147143 3.411647
## 4 NA 1.7747895 1.774789
## Crude_Death_rate_Per1000_mean Crude_Death_rate_Per1000_sd
## 1 8.127380 1.108263
## 2 7.886792 2.171297
## 3 5.614560 0.429453
## 4 6.050000 NA
## Crude_Death_rate_Per1000_min Crude_Death_rate_Per1000_max
## 1 6.57500 9.28000
## 2 6.00645 14.31135
## 3 5.05600 6.08500
## 4 6.05000 6.05000
## Rural_Land_Area_Sq_Km_mean Rural_Land_Area_Sq_Km_sd Rural_Land_Area_Sq_Km_min
## 1 5291696.7 4545825.3 218902.76
## 2 1330834.2 2371096.2 21571.77
## 3 499166.6 362899.3 82218.65
## 4 723594.5 NA 723594.53
## Rural_Land_Area_Sq_Km_max
## 1 9197138.5
## 2 8723723.1
## 3 971206.2
## 4 723594.5
#boxplot HealthExp_PercGDP
Cluster0$Cluster <- "Cluster0"
Cluster1$Cluster <- "Cluster1"
Cluster2$Cluster <- "Cluster2"
Cluster3$Cluster <- "Cluster3"
# Stack them together
combined_df1 <- bind_rows(
Cluster0 %>% select(Cluster, Value = HealthExp_PercGDP),
Cluster1 %>% select(Cluster, Value = HealthExp_PercGDP),
Cluster2 %>% select(Cluster, Value = HealthExp_PercGDP),
Cluster3 %>% select(Cluster, Value = HealthExp_PercGDP)
)
custom_colors <- c("#440154", "#3B528B", "#5DC863", "#FDE725" )
#View(combined_df)
ggplot(combined_df1, aes(x = Cluster, y = Value, fill = Cluster)) +
geom_boxplot() +
labs(title = "Health Expenditure (% GDP)",
x = "",
y = "") +
theme_minimal() +
scale_fill_manual(values = custom_colors) #scale_fill_viridis_d()
#scale_fill_brewer(palette = "Set3") # Optional color scheme
#boxplot UNDP_PovRatio_PercPop
Cluster0$Cluster <- "Cluster0"
Cluster1$Cluster <- "Cluster1"
Cluster2$Cluster <- "Cluster2"
Cluster3$Cluster <- "Cluster3"
# Stack them together
combined_df2 <- bind_rows(
Cluster0 %>% select(Cluster, Value = UNDP_PovRatio_PercPop),
Cluster1 %>% select(Cluster, Value = UNDP_PovRatio_PercPop),
Cluster2 %>% select(Cluster, Value = UNDP_PovRatio_PercPop),
Cluster3 %>% select(Cluster, Value = UNDP_PovRatio_PercPop)
)
custom_colors <- c("#440154", "#3B528B", "#5DC863", "#FDE725")
ggplot(combined_df2, aes(x = Cluster, y = Value, fill = Cluster)) +
geom_boxplot() +
labs(title = "Multidimensional Poverty Headcount Ratio (UNDP) (%
population)",
x = "",
y = "") +
theme_minimal() +
scale_fill_manual(values = custom_colors)
#boxplot Hospital_beds_1000ppl
Cluster0$Cluster <- "Cluster0"
Cluster1$Cluster <- "Cluster1"
Cluster2$Cluster <- "Cluster2"
Cluster3$Cluster <- "Cluster3"
# Stack them together
combined_df3 <- bind_rows(
Cluster0 %>% select(Cluster, Value = Hospital_beds_1000ppl),
Cluster1 %>% select(Cluster, Value = Hospital_beds_1000ppl),
Cluster2 %>% select(Cluster, Value = Hospital_beds_1000ppl),
Cluster3 %>% select(Cluster, Value = Hospital_beds_1000ppl)
)
custom_colors <- c("#440154", "#3B528B", "#5DC863", "#FDE725")
ggplot(combined_df3, aes(x = Cluster, y = Value, fill = Cluster)) +
geom_boxplot() +
labs(title = "Hospital beds (per 1,000 people)",
x = "",
y = "") +
theme_minimal() +
scale_fill_manual(values = custom_colors)
#boxplot Physicians_1000ppl
Cluster0$Cluster <- "Cluster0"
Cluster1$Cluster <- "Cluster1"
Cluster2$Cluster <- "Cluster2"
Cluster3$Cluster <- "Cluster3"
# Stack them together
combined_df4 <- bind_rows(
Cluster0 %>% select(Cluster, Value = Physicians_1000ppl),
Cluster1 %>% select(Cluster, Value = Physicians_1000ppl),
Cluster2 %>% select(Cluster, Value = Physicians_1000ppl),
Cluster3 %>% select(Cluster, Value = Physicians_1000ppl)
)
custom_colors <- c("#440154", "#3B528B", "#5DC863", "#FDE725")
#View(combined_df)
ggplot(combined_df4, aes(x = Cluster, y = Value, fill = Cluster)) +
geom_boxplot() +
labs(title = "Physicians (per 1,000 people)",
x = "",
y = "") +
theme_minimal() +
scale_fill_manual(values = custom_colors)
W H O
Indicators_df <- imp_dataset_1
Indicators_df <- cbind(Country1 = Country1, Indicators_df)
WHOnames1 <- c('Australia', 'Cambodia', 'China', "Lao PDR", 'Myanmar', 'Thailand', 'Viet Nam')
Western_Pacific <- Indicators_df[Indicators_df$Country1 %in% WHOnames1, ]
WHOnames2 <- c('Bangladesh', 'India', 'Indonesia', 'Nepal')
South_East_Asia <- Indicators_df[Indicators_df$Country1 %in% WHOnames2, ]
WHOnames3 <- c('Djibouti', 'Nigeria')
Africa <- Indicators_df[Indicators_df$Country1 %in% WHOnames3, ]
WHOnames4 <- c('Egypt, Arab Rep.', 'Iraq', 'Pakistan')
Eastern_Mediterranean <- Indicators_df[Indicators_df$Country1 %in% WHOnames4, ]
WHOnames5 <- c('Azerbaijan', 'Spain', 'United Kingdom', 'Turkiye')
European <- Indicators_df[Indicators_df$Country1 %in% WHOnames5, ]
WHOnames6 <- c('Canada', 'United States', 'Ecuador', 'Chile')
Americas <- Indicators_df[Indicators_df$Country1 %in% WHOnames6, ]
#Compare summary statistics across the 4 clusters
# Function to summarize a data frame
summary_stats <- function(df, name) {
df %>%
summarise(across(everything(), list(mean = mean, sd = sd, min = min, max = max))) %>%
mutate(DataFrame = name) # Add a column for dataset name
}
# Apply function to each data frame
Western_Pacific_summary <- summary_stats(Western_Pacific[-1], "Western_Pacific")
South_East_Asia_summary <- summary_stats(South_East_Asia[-1], "South_East_Asia")
Africa_summary <- summary_stats(Africa[-1], "Africa")
Eastern_Mediterranean_summary <- summary_stats(Eastern_Mediterranean[-1], "Eastern_Mediterranean")
European_summary <- summary_stats(European[-1], "European")
Americas_summary <- summary_stats(Americas[-1], "Americas")
# Combine all summaries
WHOcomparison <- bind_rows(Western_Pacific_summary, South_East_Asia_summary, Africa_summary, Eastern_Mediterranean_summary,European_summary, Americas_summary)
# Reorder columns to move "DataFrame" to the first position
WHOcomparison <- WHOcomparison %>% select(DataFrame, everything())
# Print the summary comparison
print(WHOcomparison)
## DataFrame HealthExp_PercGDP_mean HealthExp_PercGDP_sd
## 1 Western_Pacific 4.981070 2.2096588
## 2 South_East_Asia 3.247444 0.9578965
## 3 Africa 3.474284 0.3604135
## 4 Eastern_Mediterranean 3.621417 1.0595539
## 5 European 6.658399 3.1377497
## 6 Americas 10.255596 4.1883552
## HealthExp_PercGDP_min HealthExp_PercGDP_max GDP_per_Capita_mean
## 1 2.752456 9.099192 10003.556
## 2 2.232583 4.474737 1661.456
## 3 3.219434 3.729135 1968.466
## 4 2.573061 4.691825 2728.457
## 5 3.322632 9.763403 21841.039
## 6 6.694435 16.068211 29778.765
## GDP_per_Capita_sd GDP_per_Capita_min GDP_per_Capita_max
## 1 18556.95588 965.5861 51793.947
## 2 991.65541 791.5463 3082.969
## 3 56.09478 1928.8009 2008.131
## 4 1633.79708 1187.5561 4441.516
## 5 17710.56877 4964.7631 43234.431
## 6 24751.26287 5044.3379 55979.989
## UNDP_PovRatio_PercPop_mean UNDP_PovRatio_PercPop_sd UNDP_PovRatio_PercPop_min
## 1 12.15714 14.4980294 0.6
## 2 15.52500 8.7412337 3.6
## 3 38.30000 7.4953319 33.0
## 4 17.36667 18.2083314 5.2
## 5 0.50000 0.2160247 0.3
## 6 1.22500 1.2526638 0.1
## UNDP_PovRatio_PercPop_max Population_Rural_mean Population_Rural_sd
## 1 38.3 113327375 233082845
## 2 24.6 280343978 396776416
## 3 43.6 48387682 68116921
## 4 38.3 66390726 62508037
## 5 0.8 11557405 6963589
## 6 2.5 18328584 26888926
## Population_Rural_min Population_Rural_max PrevHIV_PercPop1549_mean
## 1 3335954 639794213 0.7285714
## 2 22682431 871947247 0.2225000
## 3 221745 96553618 5.9050000
## 4 10577103 133931920 0.1066667
## 5 4244328 20945004 0.2237500
## 6 2262842 58560038 0.3362500
## PrevHIV_PercPop1549_sd PrevHIV_PercPop1549_min PrevHIV_PercPop1549_max
## 1 0.51469015 0.1 1.550
## 2 0.10507934 0.1 0.315
## 3 8.20950973 0.1 11.710
## 4 0.01154701 0.1 0.120
## 5 0.14704733 0.1 0.390
## 6 0.09177645 0.2 0.400
## IncTB_Per100k_mean IncTB_Per100k_sd IncTB_Per100k_min IncTB_Per100k_max
## 1 218.29660 161.54107 6.361905 454.00000
## 2 276.36905 52.14154 221.000000 341.90476
## 3 292.90476 104.51712 219.000000 366.80952
## 4 108.93651 142.19426 15.904762 272.61905
## 5 31.95714 32.44352 11.633333 80.14286
## 6 17.69286 19.01307 3.833333 44.90476
## PrevDiab_PercPop2079_mean PrevDiab_PercPop2079_sd PrevDiab_PercPop2079_min
## 1 6.628571 1.972278 4.65
## 2 8.912500 2.628490 6.15
## 3 5.525000 1.873833 4.20
## 4 16.000000 5.291266 9.90
## 5 7.362500 3.066316 4.20
## 6 8.437500 2.184176 5.50
## PrevDiab_PercPop2079_max Hospital_beds_1000ppl_mean Hospital_beds_1000ppl_sd
## 1 9.70 1.9782735 1.1645089
## 2 12.35 0.8540299 0.5873647
## 3 6.85 0.9530769 0.6407475
## 4 19.35 1.0748337 0.5412768
## 5 11.20 3.4962874 1.1916561
## 6 10.15 2.3642032 0.6819519
## Hospital_beds_1000ppl_min Hospital_beds_1000ppl_max Physicians_1000ppl_mean
## 1 0.6855556 3.837692 1.0406912
## 2 0.2557895 1.640556 0.5404641
## 3 0.5000000 1.406154 0.2734545
## 4 0.4747059 1.526111 0.7575937
## 5 2.6466667 5.260588 2.8913920
## 6 1.4731579 2.963333 2.2653099
## Physicians_1000ppl_sd Physicians_1000ppl_min Physicians_1000ppl_max
## 1 1.09685808 0.2154286 3.2975556
## 2 0.17637925 0.3476000 0.7025000
## 3 0.12226519 0.1870000 0.3599091
## 4 0.05608726 0.7147143 0.8210667
## 5 0.92379764 1.6875556 3.7839444
## 6 0.59374917 1.7747895 3.0857500
## Crude_Death_rate_Per1000_mean Crude_Death_rate_Per1000_sd
## 1 7.128864 1.0685430
## 2 7.075950 0.7690557
## 3 11.722625 3.6610100
## 4 6.347917 0.9727937
## 5 7.381312 1.9596937
## 6 6.766975 1.5319842
## Crude_Death_rate_Per1000_min Crude_Death_rate_Per1000_max
## 1 6.30765 9.26200
## 2 6.00645 7.77260
## 3 9.13390 14.31135
## 4 5.61760 7.45220
## 5 5.34025 9.28000
## 6 5.05600 8.55190
## Rural_Land_Area_Sq_Km_mean Rural_Land_Area_Sq_Km_sd Rural_Land_Area_Sq_Km_min
## 1 2604167.8 3829948.5 176180.68
## 2 1249192.7 1395250.7 79328.86
## 3 451798.2 608432.1 21571.77
## 4 742399.1 277117.7 434269.61
## 5 386243.0 298274.8 82218.65
## 6 4769261.3 4948292.9 253213.92
## Rural_Land_Area_Sq_Km_max
## 1 8723723.1
## 2 2956471.3
## 3 882024.7
## 4 971206.2
## 5 754924.7
## 6 9197138.5
WORLD BANK
Indicators_df <- imp_dataset_1
Indicators_df <- cbind(Country1 = Country1, Indicators_df)
WBnames1 <- c('Australia', 'Canada', 'Spain', 'United Kingdom', 'United States', 'Chile')
High_income_countries <- Indicators_df[Indicators_df$Country1 %in% WBnames1, ]
WBnames2 <- c('Azerbaijan', 'China', 'Turkiye', 'Thailand', 'Indonesia', 'Iraq', 'Ecuador', 'Egypt, Arab Rep.', 'Viet Nam')
Upper_Middle_income_countries <- Indicators_df[Indicators_df$Country1 %in% WBnames2, ]
WBnames3 <- c('Bangladesh', 'India', 'Nigeria', 'Pakistan', 'Myanmar', "Lao PDR", 'Nepal', 'Cambodia', 'Djibouti')
Lower_Middle_income_countries <- Indicators_df[Indicators_df$Country1 %in% WBnames3, ]
#Compare summary statistics across the 4 clusters
# Function to summarize a data frame
summary_stats <- function(df, name) {
df %>%
summarise(across(everything(), list(mean = mean, sd = sd, min = min, max = max))) %>%
mutate(DataFrame = name) # Add a column for dataset name
}
# Apply function to each data frame
High_income_countries_summary <- summary_stats(High_income_countries[-1], "High_income_countries")
Upper_Middle_income_countries_summary <- summary_stats(Upper_Middle_income_countries[-1], "Upper_Middle_income_countries")
Lower_Middle_income_countries_summary <- summary_stats(Lower_Middle_income_countries[-1], "Lower_Middle_income_countries")
# Combine all summaries
WBcomparison <- bind_rows(High_income_countries_summary, Upper_Middle_income_countries_summary, Lower_Middle_income_countries_summary)
# Reorder columns to move "DataFrame" to the first position
WBcomparison <- WBcomparison %>% select(DataFrame, everything())
# Print the summary comparison
print(WBcomparison)
## DataFrame HealthExp_PercGDP_mean HealthExp_PercGDP_sd
## 1 High_income_countries 10.342491 2.946684
## 2 Upper_Middle_income_countries 4.303142 1.133532
## 3 Lower_Middle_income_countries 3.615872 1.330347
## HealthExp_PercGDP_min HealthExp_PercGDP_max GDP_per_Capita_mean
## 1 7.783749 16.068211 39730.799
## 2 2.829826 6.694435 4919.623
## 3 2.232583 6.688282 1401.204
## GDP_per_Capita_sd GDP_per_Capita_min GDP_per_Capita_max
## 1 16110.8137 12636.4768 55979.989
## 2 2354.6114 2247.1656 9879.269
## 3 404.8989 791.5463 2008.131
## UNDP_PovRatio_PercPop_mean UNDP_PovRatio_PercPop_sd UNDP_PovRatio_PercPop_min
## 1 0.7333333 0.8914408 0.1
## 2 3.0000000 2.6758176 0.3
## 3 27.9333333 10.5524879 16.4
## UNDP_PovRatio_PercPop_max Population_Rural_mean Population_Rural_sd
## 1 2.5 15312917 21476857
## 2 8.6 106279662 203494638
## 3 43.6 142418285 277913443
## Population_Rural_min Population_Rural_max PrevHIV_PercPop1549_mean
## 1 2262842 58560038 0.2950000
## 2 4244328 639794213 0.4583333
## 3 221745 871947247 1.6072222
## PrevHIV_PercPop1549_sd PrevHIV_PercPop1549_min PrevHIV_PercPop1549_max
## 1 0.1213260 0.1 0.40
## 2 0.5237485 0.1 1.55
## 3 3.7999688 0.1 11.71
## IncTB_Per100k_mean IncTB_Per100k_sd IncTB_Per100k_min IncTB_Per100k_max
## 1 9.620635 5.154079 3.833333 16.61905
## 2 112.153439 110.671043 15.904762 341.90476
## 3 297.518519 89.035352 202.476191 454.00000
## PrevDiab_PercPop2079_mean PrevDiab_PercPop2079_sd PrevDiab_PercPop2079_min
## 1 8.133333 1.795736 5.75
## 2 8.927778 4.427902 4.20
## 3 8.350000 4.838647 4.20
## PrevDiab_PercPop2079_max Hospital_beds_1000ppl_mean Hospital_beds_1000ppl_sd
## 1 10.15 2.9832070 0.5274749
## 2 18.75 2.2479003 1.3237900
## 3 19.35 0.8562563 0.4728172
## Hospital_beds_1000ppl_min Hospital_beds_1000ppl_max Physicians_1000ppl_mean
## 1 2.1942105 3.837692 2.8227172
## 2 0.9026316 5.260588 1.2952329
## 3 0.2557895 1.640556 0.4777548
## Physicians_1000ppl_sd Physicians_1000ppl_min Physicians_1000ppl_max
## 1 0.7207557 1.774789 3.7839444
## 2 0.9734026 0.347600 3.4116471
## 3 0.2242525 0.187000 0.8210667
## Crude_Death_rate_Per1000_mean Crude_Death_rate_Per1000_sd
## 1 7.781150 1.304549
## 2 6.200144 0.849277
## 3 8.311089 2.503344
## Crude_Death_rate_Per1000_min Crude_Death_rate_Per1000_max
## 1 6.05000 9.28000
## 2 5.05600 7.77260
## 3 6.00645 14.31135
## Rural_Land_Area_Sq_Km_mean Rural_Land_Area_Sq_Km_sd Rural_Land_Area_Sq_Km_min
## 1 4530346.3 4473203.4 218902.76
## 2 1536657.0 2744770.5 82218.65
## 3 662973.9 920908.6 21571.77
## Rural_Land_Area_Sq_Km_max
## 1 9197138
## 2 8723723
## 3 2956471
U N
Indicators_df <- imp_dataset_1
Indicators_df <- cbind(Country1 = Country1, Indicators_df)
UNnames1 <- c('Azerbaijan', 'China', 'Cambodia', 'Indonesia', "Lao PDR", 'Myanmar', 'Thailand', 'Viet Nam', 'Bangladesh', 'India', 'Nepal', 'Pakistan', 'Iraq', 'Turkiye')
Asia <- Indicators_df[Indicators_df$Country1 %in% UNnames1, ]
UNnames2 <- c('Egypt, Arab Rep.', 'Djibouti', 'Nigeria')
Afric <- Indicators_df[Indicators_df$Country1 %in% UNnames2, ]
UNnames3 <- c('Spain', 'United Kingdom')
Europe <- Indicators_df[Indicators_df$Country1 %in% UNnames3, ]
UNnames3 <- c('Canada', 'United States', 'Ecuador', 'Chile')
America <- Indicators_df[Indicators_df$Country1 %in% UNnames3, ]
UNnames3 <- c('Australia')
Ocean <- Indicators_df[Indicators_df$Country1 %in% UNnames3, ]
#Compare summary statistics across the 4 clusters
# Function to summarize a data frame
summary_stats <- function(df, name) {
df %>%
summarise(across(everything(), list(mean = mean, sd = sd, min = min, max = max))) %>%
mutate(DataFrame = name) # Add a column for dataset name
}
# Apply function to each data frame
Asia_summary <- summary_stats(Asia[-1], "Asia")
Afric_summary <- summary_stats(Afric[-1], "Afric")
Europe_summary <- summary_stats(Europe[-1], "Europe")
America_summary <- summary_stats(America[-1], "America")
Ocean_summary <- summary_stats(Ocean[-1], "Ocean")
# Combine all summaries
UNcomparison <- bind_rows(Asia_summary, Afric_summary, Europe_summary, America_summary, Ocean_summary)
# Reorder columns to move "DataFrame" to the first position
UNcomparison <- UNcomparison %>% select(DataFrame, everything())
# Print the summary comparison
print(UNcomparison)
## DataFrame HealthExp_PercGDP_mean HealthExp_PercGDP_sd HealthExp_PercGDP_min
## 1 Asia 3.781164 1.1602748 2.232583
## 2 Afric 3.880131 0.7477191 3.219434
## 3 Europe 9.313902 0.6356892 8.864402
## 4 America 10.255596 4.1883552 6.694435
## 5 Ocean 9.099192 NA 9.099192
## HealthExp_PercGDP_max GDP_per_Capita_mean GDP_per_Capita_sd
## 1 6.688282 3239.277 2696.5217
## 2 4.691825 2164.410 341.6951
## 3 9.763403 36260.063 9863.2459
## 4 16.068211 29778.765 24751.2629
## 5 9.099192 51793.947 NA
## GDP_per_Capita_min GDP_per_Capita_max UNDP_PovRatio_PercPop_mean
## 1 791.5463 9879.269 13.89286
## 2 1928.8009 2556.298 27.26667
## 3 29285.6946 43234.431 0.45000
## 4 5044.3379 55979.989 1.22500
## 5 51793.9466 51793.947 0.70000
## UNDP_PovRatio_PercPop_sd UNDP_PovRatio_PercPop_min UNDP_PovRatio_PercPop_max
## 1 13.38193181 0.3 38.3
## 2 19.83162458 5.2 43.6
## 3 0.07071068 0.4 0.5
## 4 1.25266383 0.1 2.5
## 5 NA 0.7 0.7
## Population_Rural_mean Population_Rural_sd Population_Rural_min
## 1 148644995 264821191 4244328
## 2 50479506 48302015 221745
## 3 10520144 1309503 9594186
## 4 18328584 26888926 2262842
## 5 3335954 NA 3335954
## Population_Rural_max PrevHIV_PercPop1549_mean PrevHIV_PercPop1549_sd
## 1 871947247 0.4507143 0.45797992
## 2 96553618 3.9700000 6.70303663
## 3 11446103 0.3475000 0.06010408
## 4 58560038 0.3362500 0.09177645
## 5 3335954 0.1000000 NA
## PrevHIV_PercPop1549_min PrevHIV_PercPop1549_max IncTB_Per100k_mean
## 1 0.100 1.55 217.173469
## 2 0.100 11.71 200.571429
## 3 0.305 0.39 12.747619
## 4 0.200 0.40 17.692857
## 5 0.100 0.10 6.361905
## IncTB_Per100k_sd IncTB_Per100k_min IncTB_Per100k_max
## 1 131.904367 22.190476 454.000000
## 2 176.176750 15.904762 366.809524
## 3 1.575838 11.633333 13.861905
## 4 19.013070 3.833333 44.904762
## 5 NA 6.361905 6.361905
## PrevDiab_PercPop2079_mean PrevDiab_PercPop2079_sd PrevDiab_PercPop2079_min
## 1 8.585714 4.025134 4.20
## 2 9.933333 7.749570 4.20
## 3 7.025000 1.803122 5.75
## 4 8.437500 2.184176 5.50
## 5 6.500000 NA 6.50
## PrevDiab_PercPop2079_max Hospital_beds_1000ppl_mean Hospital_beds_1000ppl_sd
## 1 19.35 1.645142 1.33780880
## 2 18.75 1.144088 0.56101226
## 3 8.30 3.038947 0.08485281
## 4 10.15 2.364203 0.68195187
## 5 6.50 3.837692 NA
## Hospital_beds_1000ppl_min Hospital_beds_1000ppl_max Physicians_1000ppl_mean
## 1 0.2557895 5.260588 0.9147435
## 2 0.5000000 1.526111 0.4205411
## 3 2.9789474 3.098947 3.2331827
## 4 1.4731579 2.963333 2.2653099
## 5 3.8376923 3.837692 3.2975556
## Physicians_1000ppl_sd Physicians_1000ppl_min Physicians_1000ppl_max
## 1 0.8402214 0.2154286 3.4116471
## 2 0.2690312 0.1870000 0.7147143
## 3 0.7788947 2.6824211 3.7839444
## 4 0.5937492 1.7747895 3.0857500
## 5 NA 3.2975556 3.2975556
## Crude_Death_rate_Per1000_mean Crude_Death_rate_Per1000_sd
## 1 6.866136 1.0392442
## 2 9.806400 4.2091866
## 3 9.050000 0.3252691
## 4 6.766975 1.5319842
## 5 6.575000 NA
## Crude_Death_rate_Per1000_min Crude_Death_rate_Per1000_max
## 1 5.34025 9.26200
## 2 5.97395 14.31135
## 3 8.82000 9.28000
## 4 5.05600 8.55190
## 5 6.57500 6.57500
## Rural_Land_Area_Sq_Km_mean Rural_Land_Area_Sq_Km_sd Rural_Land_Area_Sq_Km_min
## 1 1262047.3 2290076.7 79328.86
## 2 624934.2 524426.4 21571.77
## 3 353914.2 190935.1 218902.76
## 4 4769261.3 4948292.9 253213.92
## 5 7650418.1 NA 7650418.08
## Rural_Land_Area_Sq_Km_max
## 1 8723723.1
## 2 971206.2
## 3 488925.7
## 4 9197138.5
## 5 7650418.1
Bubble plots - H5N1 Countries
library(ggplot2)
AvianCountriesDF<- Indicators_df[Indicators_df$Country1 %in% c('Australia', 'Canada', 'Spain', 'United Kingdom', 'United States','Bangladesh', 'Cambodia', 'China', 'Djibouti', 'India', 'Indonesia', "Lao PDR", 'Myanmar', 'Nepal', 'Nigeria', 'Pakistan', 'Thailand', 'Viet Nam', 'Azerbaijan', 'Ecuador', 'Egypt, Arab Rep.', 'Iraq', 'Turkiye', 'Chile'), ]
AvianCountriesDF$Hospital_beds_1000ppl <- round(AvianCountriesDF$Hospital_beds_1000ppl, 0)
AvianCountriesDF$Physicians_1000ppl <- round(AvianCountriesDF$Physicians_1000ppl, 0)
# Create bubble plot
ggplot(AvianCountriesDF, aes(x = HealthExp_PercGDP, y = UNDP_PovRatio_PercPop, size = Physicians_1000ppl, color = as.factor(Hospital_beds_1000ppl)))+ #as.factor(cyl))) +
geom_point(alpha = 0.6) +
scale_size_continuous(range = c(2, 12)) +
labs(
title = "Health Expenditure (% GDP) vs UNDP Poverty Ratio (% Population)",
x = "Health Expenditure (% GDP)",
y = "UNDP Poverty Ratio (% Population)",
size = "Physicians per 1000 people",
color = "Hospital beds per 1000 people"
) +
theme_minimal()
Bubble plots - H5N1 Clusters
library(ggplot2)
library(ggrepel)
library(dplyr)
AvianClustersDF <- comparison %>%
rename(Cluster = DataFrame) %>% # rename the column
mutate(Cluster = dplyr::recode_factor(Cluster, # recode
"Cluster1" = "Cluster 0",
"Cluster2" = "Cluster 1",
"Cluster3" = "Cluster 2",
"Cluster4" = "Cluster 3"))
# Round numeric variables
AvianClustersDF$Hospital_beds_1000ppl_mean <- round(AvianClustersDF$Hospital_beds_1000ppl_mean, 0)
AvianClustersDF$Physicians_1000ppl_mean <- round(AvianClustersDF$Physicians_1000ppl_mean, 0)
# Plot
ggplot(AvianClustersDF, aes(
x = HealthExp_PercGDP_mean,
y = UNDP_PovRatio_PercPop_mean,
size = Physicians_1000ppl_mean,
color = as.factor(Hospital_beds_1000ppl_mean)
)) +
geom_point(alpha = 0.6) +
geom_text(aes(label = Cluster), vjust = -1.5, size = 4, fontface = "bold", show.legend = FALSE) +
scale_color_discrete(guide = guide_legend(override.aes = list(shape = 16, size = 5))) +
scale_size_continuous(range = c(3, 13)) +
labs(
title = "Health Expenditure (% GDP) versus UNDP Poverty Ratio (% population)",
x = "Health Expenditure (% GDP)",
y = "UNDP Poverty Ratio (% population)",
size = "Physicians \nper 1000 people",
color = "Hospital Beds \nper 1000 people"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "right"
)
library(ggplot2)
library(ggrepel)
library(dplyr)
AvianClustersDF1 <- comparison %>%
rename(Cluster = DataFrame) %>% # rename the column
mutate(Cluster = dplyr::recode_factor(Cluster, # recode
"Cluster1" = "Cluster 0",
"Cluster2" = "Cluster 1",
"Cluster3" = "Cluster 2",
"Cluster4" = "Cluster 3"))
# Round numeric variables
AvianClustersDF1$GDP_per_Capita_mean <- round(AvianClustersDF1$GDP_per_Capita_mean, 0)
AvianClustersDF1$Crude_Death_rate_Per1000_mean <-round(AvianClustersDF1$Crude_Death_rate_Per1000_mean, 0)
# Plot
ggplot(AvianClustersDF1, aes(
x = PrevHIV_PercPop1549_mean,
y = IncTB_Per100k_mean,
size = GDP_per_Capita_mean,
color = as.factor(Crude_Death_rate_Per1000_mean)
)) +
geom_point(alpha = 0.6) +
geom_text(aes(label = Cluster), vjust = -1.5, size = 4, fontface = "bold", show.legend = FALSE) +
scale_color_discrete(guide = guide_legend(override.aes = list(shape = 16, size = 5))) +
scale_size_continuous(range = c(3, 13)) +
labs(
title = "HIV Prevalence in 15 - 49 Age Group (% population) versus \nTB Incidence per 100,000 people",
x = "HIV Prevalence in 15 - 49 age group (% population)",
y = "TB Incidence per 100,000 people",
size = "GDP per capita",
color = "Crude Death Rate \nper 1000 people"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "right"
)
Plot of CFR’s
library(ggplot2)
library(ggthemes) # optional, for themes
cfr_data <- data.frame(
Group = c(
"Cluster 0", "Cluster 1", "Cluster 2", "Cluster 3",
"Western Pacific", "South East Asia", "Africa (WHO)", "Eastern Mediterranean",
"European (WHO)", "Americas (WHO)",
"High Income", "Upper Middle Income", "Lower Middle Income",
"Asia (UN)", "Africa (UN)", "Europe (UN)", "Americas (UN)", "Oceania",
"Overall"
),
CFR = c(
8.25, 66.22, 34.26, 29.29,
55.38, 81.23, 50.00, 33.76,
34.12, 11.70,
7.86, 52.08, 54.87,
65.48, 33.58, 8.30, 11.70, 29.29,
51.33
),
Lower = c(
1.23, 62.01, 29.63, 1.26,
49.61, 75.61, 9.43, 29.04,
18.64, 1.78,
1.17, 48.60, 44.70,
61.34, 28.85, 0.32, 1.78, 1.26,
48.07
),
Upper = c(
24.87, 70.27, 39.10, 84.19,
61.05, 86.10, 90.57, 38.70,
52.35, 33.87,
23.82, 55.54, 64.78,
69.47, 38.54, 36.94, 33.87, 84.19,
54.57
),
Category = c(
rep("Study Clusters", 4),
rep("WHO Regions", 6),
rep("World Bank", 3),
rep("UN Regions", 5),
"Overall"
)
)
library(ggplot2)
library(viridis)
# Reorder categories and group labels
cfr_data$Category <- factor(
cfr_data$Category,
levels = c("Study Clusters", "WHO Regions", "World Bank", "UN Regions", "Overall")
)
# Reorder group levels to group by Category
cfr_data$Group <- factor(cfr_data$Group, levels = cfr_data$Group[order(cfr_data$Category, -cfr_data$CFR)])
cfr_data$Group <- factor(cfr_data$Group, levels = c(
"Cluster 0", "Cluster 1", "Cluster 2", "Cluster 3",
"Western Pacific", "South East Asia", "Africa (WHO)", "Eastern Mediterranean",
"European (WHO)", "Americas (WHO)",
"High Income", "Upper Middle Income", "Lower Middle Income",
"Asia (UN)", "Africa (UN)", "Europe (UN)", "Americas (UN)", "Oceania",
"Overall"
))
ggplot(cfr_data, aes(x = Group, y = CFR, color = Category)) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.3, alpha = 0.7, linewidth = 1.2) +
geom_text(
aes(label = sprintf("%.1f%%", CFR)),
hjust = 0.5, vjust = -0.7, size = 3.2, color = "black"
) +
coord_flip() +
labs(
title = "Case Fatality Rate (CFR) by Cluster with 95% Credible Intervals",
x = "",
y = "CFR (%)",
color = "Clustering Scheme:"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "top",
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank()
) +
scale_color_viridis_d(option = "C")
Forest plot of CFRs
suppressMessages(library(tidyverse))
library(ggthemes)
# Your CFR data
cfr_data <- data.frame(
Group = c(
"Cluster 0", "Cluster 1", "Cluster 2", "Cluster 3",
"Western Pacific", "South East Asia", "Africa (WHO)", "Eastern Mediterranean",
"European (WHO)", "Americas (WHO)",
"High Income", "Upper Middle Income", "Lower Middle Income",
"Asia (UN)", "Africa (UN)", "Europe (UN)", "Americas (UN)", "Oceania",
"Overall"
),
CFR = c(
8.25, 66.22, 34.26, 29.29,
55.38, 81.23, 50.00, 33.76,
34.12, 11.70,
7.86, 52.08, 54.87,
65.48, 33.58, 8.30, 11.70, 29.29,
51.33
),
Lower = c(
1.23, 62.01, 29.63, 1.26,
49.61, 75.61, 9.43, 29.04,
18.64, 1.78,
1.17, 48.60, 44.70,
61.34, 28.85, 0.32, 1.78, 1.26,
48.07
),
Upper = c(
24.87, 70.27, 39.10, 84.19,
61.05, 86.10, 90.57, 38.70,
52.35, 33.87,
23.82, 55.54, 64.78,
69.47, 38.54, 36.94, 33.87, 84.19,
54.57
),
Category = c(
rep("Study Clusters", 4),
rep("WHO Regions", 6),
rep("World Bank", 3),
rep("UN Regions", 5),
"Overall"
)
)
# Format labels
cfr_data <- cfr_data %>%
mutate(
variable = row_number(),
estext = paste0(round(CFR, 2), " (", round(Lower, 2), ", ", round(Upper, 2), ")")
)
# Custom theme
theme_ham <- function(base_size = 12, base_family = "sans"){
ggthemes::theme_fivethirtyeight(base_size, base_family) +
ggplot2::theme(
panel.spacing = ggplot2::unit(1.5, 'lines'),
panel.border = ggplot2::element_rect(color = "grey50", fill = NA, linewidth = 1, linetype = 1),
plot.background = ggplot2::element_rect(fill='white', colour='white'),
panel.background = ggplot2::element_rect(fill='white', colour='white'),
strip.text = ggplot2::element_text(colour = 'white', face = 'bold'),
axis.title = ggplot2::element_text(),
strip.background = ggplot2::element_rect(colour = "grey50", fill = "grey50"),
legend.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_line(linetype = 3, linewidth = 0.2)
)
}
# Forest plot
ggplot(data = cfr_data, aes(x = variable)) +
geom_hline(yintercept = 0) +
geom_point(aes(y = CFR), size = 2, color = "black", shape = 5) +
geom_linerange(
aes(
ymin = Lower,
ymax = Upper
),
linewidth = 0.5,
color = "black"
) +
labs(y = "Case Fatality Rate (%)", x = "") +
theme_ham() +
scale_x_continuous(
breaks = cfr_data$variable,
labels = cfr_data$Group,
sec.axis = sec_axis(
~.,
breaks = cfr_data$variable,
labels = cfr_data$estext
)
) +
coord_flip(xlim = c(0.5, nrow(cfr_data) + 0.5))
library(tidyverse)
library(ggthemes)
library(viridis)
# Data
cfr_data <- data.frame(
Group = c(
"Cluster 0", "Cluster 1", "Cluster 2", "Cluster 3",
"Western Pacific", "South East Asia", "Africa (WHO)", "Eastern Mediterranean",
"European (WHO)", "Americas (WHO)",
"High Income", "Upper Middle Income", "Lower Middle Income",
"Asia (UN)", "Africa (UN)", "Europe (UN)", "Americas (UN)", "Oceania",
"Overall"
),
CFR = c(
8.25, 66.22, 34.26, 29.29,
55.38, 81.23, 50.00, 33.76,
34.12, 11.70,
7.86, 52.08, 54.87,
65.48, 33.58, 8.30, 11.70, 29.29,
51.33
),
Lower = c(
1.23, 62.01, 29.63, 1.26,
49.61, 75.61, 9.43, 29.04,
18.64, 1.78,
1.17, 48.60, 44.70,
61.34, 28.85, 0.32, 1.78, 1.26,
48.07
),
Upper = c(
24.87, 70.27, 39.10, 84.19,
61.05, 86.10, 90.57, 38.70,
52.35, 33.87,
23.82, 55.54, 64.78,
69.47, 38.54, 36.94, 33.87, 84.19,
54.57
),
Category = c(
rep("Study Clusters", 4),
rep("WHO Regions", 6),
rep("World Bank Regions", 3),
rep("UN Regions", 5),
"Overall"
)
)
# Prepare labels
library(stringr)
cfr_data <- cfr_data %>%
mutate(
estext = sprintf("%.1f%%", CFR),
Group_clean = str_remove(Group, "\\s*\\(.*\\)"), # strip (WHO), (UN)
Category = factor(Category,
levels = c("UN Regions", "WHO Regions",
"World Bank Regions", "Study Clusters", "Overall")),
Group = factor(Group, levels = rev(Group)) # keep order for plotting
)
# Custom theme
theme_ham <- function(base_size = 12, base_family = "sans"){
ggthemes::theme_fivethirtyeight(base_size, base_family) +
theme(
panel.spacing.y = unit(0.5, 'lines'), # more vertical space
panel.border = element_rect(color = "grey50", fill = NA, linewidth = 1),
plot.background = element_rect(fill='white', colour='white'),
panel.background = element_rect(fill='white', colour='white'),
strip.text = element_text(colour = 'white', face = 'bold'),
axis.title = element_text(),
strip.background = element_rect(colour = "grey50", fill = "grey50"),
legend.position = "none",
panel.grid.major = element_line(linetype = 3, linewidth = 0.2)
)
}
label_map <- setNames(as.character(cfr_data$Group_clean),
cfr_data$Group)
# Plot
ggplot(cfr_data, aes(x = Group, y = CFR, colour = Category)) +
geom_point(size = 2, shape = 5) +
geom_linerange(aes(ymin = Lower, ymax = Upper), linewidth = 0.5) +
geom_text(aes(label = estext), hjust = 0.45, vjust = -0.5,
size = 3, colour = "black") +
scale_x_discrete(labels = label_map) + # match levels to clean labels
labs(y = "Case Fatality Rate (%)", x = "") +
coord_flip() +
theme_ham() +
scale_color_viridis_d(option = "C", end = 0.9) +
facet_grid(Category ~ ., scales = "free_y", space = "free_y", switch = "y") +
theme(strip.placement = "outside",
strip.text.y.left = element_text(angle = 0))